Skip to content

Commit 8feb1a1

Browse files
Implement int_format_into feature
1 parent 86d0aef commit 8feb1a1

File tree

3 files changed

+282
-42
lines changed

3 files changed

+282
-42
lines changed

library/core/src/fmt/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ mod float;
1515
#[cfg(no_fp_fmt_parse)]
1616
mod nofloat;
1717
mod num;
18+
mod num_buffer;
1819
mod rt;
1920

2021
#[stable(feature = "fmt_flags_align", since = "1.28.0")]
@@ -33,6 +34,9 @@ pub enum Alignment {
3334
Center,
3435
}
3536

37+
#[unstable(feature = "int_format_into", issue = "138215")]
38+
pub use num_buffer::{NumBuffer, NumBufferTrait};
39+
3640
#[stable(feature = "debug_builders", since = "1.2.0")]
3741
pub use self::builders::{DebugList, DebugMap, DebugSet, DebugStruct, DebugTuple};
3842
#[unstable(feature = "debug_closure_helpers", issue = "117729")]

library/core/src/fmt/num.rs

Lines changed: 218 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
//! Integer and floating-point number formatting
22
3+
use crate::fmt::NumBuffer;
34
use crate::mem::MaybeUninit;
45
use crate::num::fmt as numfmt;
56
use crate::ops::{Div, Rem, Sub};
@@ -199,6 +200,17 @@ static DEC_DIGITS_LUT: &[u8; 200] = b"\
199200
6061626364656667686970717273747576777879\
200201
8081828384858687888990919293949596979899";
201202

203+
/// This function converts a slice of ascii characters into a `&str` starting from `offset`.
204+
///
205+
/// Safety notes: `buf` content starting from `offset` index MUST BE initialized and MUST BE ascii
206+
/// characters.
207+
unsafe fn slice_buffer_to_str(buf: &[MaybeUninit<u8>], offset: usize) -> &str {
208+
// SAFETY: All buf content since offset is set.
209+
let written = unsafe { buf.get_unchecked(offset..) };
210+
// SAFETY: Writes use ASCII from the lookup table exclusively.
211+
unsafe { str::from_utf8_unchecked(written.assume_init_ref()) }
212+
}
213+
202214
macro_rules! impl_Display {
203215
($($signed:ident, $unsigned:ident,)* ; as $u:ident via $conv_fn:ident named $gen_name:ident) => {
204216

@@ -248,6 +260,12 @@ macro_rules! impl_Display {
248260
issue = "none"
249261
)]
250262
pub fn _fmt<'a>(self, buf: &'a mut [MaybeUninit::<u8>]) -> &'a str {
263+
let offset = self._fmt_inner(buf);
264+
// SAFETY: Starting from `offset`, all elements of the slice have been set.
265+
unsafe { slice_buffer_to_str(buf, offset) }
266+
}
267+
268+
fn _fmt_inner(self, buf: &mut [MaybeUninit::<u8>]) -> usize {
251269
// Count the number of bytes in buf that are not initialized.
252270
let mut offset = buf.len();
253271
// Consume the least-significant decimals from a working copy.
@@ -309,24 +327,99 @@ macro_rules! impl_Display {
309327
// not used: remain = 0;
310328
}
311329

312-
// SAFETY: All buf content since offset is set.
313-
let written = unsafe { buf.get_unchecked(offset..) };
314-
// SAFETY: Writes use ASCII from the lookup table exclusively.
315-
unsafe {
316-
str::from_utf8_unchecked(slice::from_raw_parts(
317-
MaybeUninit::slice_as_ptr(written),
318-
written.len(),
319-
))
330+
offset
331+
}
332+
}
333+
334+
impl $signed {
335+
/// Allows users to write an integer (in signed decimal format) into a variable `buf` of
336+
/// type [`NumBuffer`] that is passed by the caller by mutable reference.
337+
///
338+
/// # Examples
339+
///
340+
/// ```
341+
/// #![feature(int_format_into)]
342+
/// use core::fmt::NumBuffer;
343+
///
344+
#[doc = concat!("let n = 0", stringify!($signed), ";")]
345+
/// let mut buf = NumBuffer::new();
346+
/// assert_eq!(n.format_into(&mut buf), "0");
347+
///
348+
#[doc = concat!("let n1 = 32", stringify!($unsigned), ";")]
349+
/// let mut buf1 = NumBuffer::new();
350+
/// assert_eq!(n1.format_into(&mut buf1), "32");
351+
///
352+
#[doc = concat!("let n2 = ", stringify!($unsigned::MAX), ";")]
353+
/// let mut buf2 = NumBuffer::new();
354+
#[doc = concat!("assert_eq!(n2.format_into(&mut buf2), ", stringify!($unsigned::MAX), ".to_string());")]
355+
/// ```
356+
#[unstable(feature = "int_format_into", issue = "138215")]
357+
pub fn format_into(self, buf: &mut NumBuffer<Self>) -> &str {
358+
let mut offset;
359+
360+
#[cfg(not(feature = "optimize_for_size"))]
361+
{
362+
offset = self.unsigned_abs()._fmt_inner(&mut buf.buf);
363+
}
364+
#[cfg(feature = "optimize_for_size")]
365+
{
366+
offset = _inner_slow_integer_to_str(self.unsigned_abs().$conv_fn(), &mut buf.buf);
320367
}
368+
// Only difference between signed and unsigned are these 4 lines.
369+
if self < 0 {
370+
offset -= 1;
371+
buf.buf[offset].write(b'-');
372+
}
373+
// SAFETY: Starting from `offset`, all elements of the slice have been set.
374+
unsafe { slice_buffer_to_str(&buf.buf, offset) }
321375
}
322-
})*
376+
}
377+
378+
impl $unsigned {
379+
/// Allows users to write an integer (in signed decimal format) into a variable `buf` of
380+
/// type [`NumBuffer`] that is passed by the caller by mutable reference.
381+
///
382+
/// # Examples
383+
///
384+
/// ```
385+
/// #![feature(int_format_into)]
386+
/// use core::fmt::NumBuffer;
387+
///
388+
#[doc = concat!("let n = 0", stringify!($signed), ";")]
389+
/// let mut buf = NumBuffer::new();
390+
/// assert_eq!(n.format_into(&mut buf), "0");
391+
///
392+
#[doc = concat!("let n1 = 32", stringify!($unsigned), ";")]
393+
/// let mut buf1 = NumBuffer::new();
394+
/// assert_eq!(n1.format_into(&mut buf1), "32");
395+
///
396+
#[doc = concat!("let n2 = ", stringify!($unsigned::MAX), ";")]
397+
/// let mut buf2 = NumBuffer::new();
398+
#[doc = concat!("assert_eq!(n2.format_into(&mut buf2), ", stringify!($unsigned::MAX), ".to_string());")]
399+
/// ```
400+
#[unstable(feature = "int_format_into", issue = "138215")]
401+
pub fn format_into(self, buf: &mut NumBuffer<Self>) -> &str {
402+
let offset;
403+
404+
#[cfg(not(feature = "optimize_for_size"))]
405+
{
406+
offset = self._fmt_inner(&mut buf.buf);
407+
}
408+
#[cfg(feature = "optimize_for_size")]
409+
{
410+
offset = _inner_slow_integer_to_str(self.$conv_fn(), &mut buf.buf);
411+
}
412+
// SAFETY: Starting from `offset`, all elements of the slice have been set.
413+
unsafe { slice_buffer_to_str(&buf.buf, offset) }
414+
}
415+
}
416+
417+
418+
)*
323419

324420
#[cfg(feature = "optimize_for_size")]
325-
fn $gen_name(mut n: $u, is_nonnegative: bool, f: &mut fmt::Formatter<'_>) -> fmt::Result {
326-
const MAX_DEC_N: usize = $u::MAX.ilog(10) as usize + 1;
327-
let mut buf = [MaybeUninit::<u8>::uninit(); MAX_DEC_N];
328-
let mut curr = MAX_DEC_N;
329-
let buf_ptr = MaybeUninit::slice_as_mut_ptr(&mut buf);
421+
fn _inner_slow_integer_to_str(mut n: $u, buf: &mut [MaybeUninit::<u8>]) -> usize {
422+
let mut curr = buf.len();
330423

331424
// SAFETY: To show that it's OK to copy into `buf_ptr`, notice that at the beginning
332425
// `curr == buf.len() == 39 > log(n)` since `n < 2^128 < 10^39`, and at
@@ -336,20 +429,25 @@ macro_rules! impl_Display {
336429
unsafe {
337430
loop {
338431
curr -= 1;
339-
buf_ptr.add(curr).write((n % 10) as u8 + b'0');
432+
buf[curr].write((n % 10) as u8 + b'0');
340433
n /= 10;
341434

342435
if n == 0 {
343436
break;
344437
}
345438
}
346439
}
440+
cur
441+
}
347442

348-
// SAFETY: `curr` > 0 (since we made `buf` large enough), and all the chars are valid UTF-8
349-
let buf_slice = unsafe {
350-
str::from_utf8_unchecked(
351-
slice::from_raw_parts(buf_ptr.add(curr), buf.len() - curr))
352-
};
443+
#[cfg(feature = "optimize_for_size")]
444+
fn $gen_name(n: $u, is_nonnegative: bool, f: &mut fmt::Formatter<'_>) -> fmt::Result {
445+
const MAX_DEC_N: usize = $u::MAX.ilog(10) as usize + 1;
446+
let mut buf = [MaybeUninit::<u8>::uninit(); MAX_DEC_N];
447+
448+
let offset = _inner_slow_integer_to_str(n, &mut buf);
449+
// SAFETY: Starting from `offset`, all elements of the slice have been set.
450+
let buf_slice = unsafe { slice_buffer_to_str(&buf, offset) };
353451
f.pad_integral(is_nonnegative, "", buf_slice)
354452
}
355453
};
@@ -579,35 +677,112 @@ impl fmt::Display for i128 {
579677
}
580678
}
581679

582-
/// Format optimized for u128. Computation of 128 bits is limited by proccessing
583-
/// in batches of 16 decimals at a time.
584-
fn fmt_u128(n: u128, is_nonnegative: bool, f: &mut fmt::Formatter<'_>) -> fmt::Result {
680+
impl u128 {
681+
/// Allows users to write an integer (in signed decimal format) into a variable `buf` of
682+
/// type [`NumBuffer`] that is passed by the caller by mutable reference.
683+
///
684+
/// # Examples
685+
///
686+
/// ```
687+
/// #![feature(int_format_into)]
688+
/// use core::fmt::NumBuffer;
689+
///
690+
/// let n = 0u128;
691+
/// let mut buf = NumBuffer::new();
692+
/// assert_eq!(n.format_into(&mut buf), "0");
693+
///
694+
/// let n1 = 32u128;
695+
/// let mut buf1 = NumBuffer::new();
696+
/// assert_eq!(n1.format_into(&mut buf1), "32");
697+
///
698+
/// let n2 = u128::MAX;
699+
/// let mut buf2 = NumBuffer::new();
700+
/// assert_eq!(n2.format_into(&mut buf2), u128::MAX.to_string());
701+
/// ```
702+
#[unstable(feature = "int_format_into", issue = "138215")]
703+
pub fn format_into(self, buf: &mut NumBuffer<Self>) -> &str {
704+
// FIXME: Once const generics are better, use `NumberBufferTrait::BUF_SIZE` as generic const
705+
// for `fmt_u128_inner`.
706+
//
707+
// In the meantime, we have to use a slice starting at index 1 and add 1 to the returned
708+
// offset to ensure the number is correctly generated at the end of the buffer.
709+
let offset = fmt_u128_inner(self, &mut buf.buf[1..]) + 1;
710+
// SAFETY: Starting from `offset`, all elements of the slice have been set.
711+
unsafe { slice_buffer_to_str(&buf.buf, offset) }
712+
}
713+
}
714+
715+
impl i128 {
716+
/// Allows users to write an integer (in signed decimal format) into a variable `buf` of
717+
/// type [`NumBuffer`] that is passed by the caller by mutable reference.
718+
///
719+
/// # Examples
720+
///
721+
/// ```
722+
/// #![feature(int_format_into)]
723+
/// use core::fmt::NumBuffer;
724+
///
725+
/// let n = 0i128;
726+
/// let mut buf = NumBuffer::new();
727+
/// assert_eq!(n.format_into(&mut buf), "0");
728+
///
729+
/// let n1 = 32i128;
730+
/// let mut buf1 = NumBuffer::new();
731+
/// assert_eq!(n1.format_into(&mut buf1), "32");
732+
///
733+
/// let n2 = i128::MAX;
734+
/// let mut buf2 = NumBuffer::new();
735+
/// assert_eq!(n2.format_into(&mut buf2), i128::MAX.to_string());
736+
/// ```
737+
#[unstable(feature = "int_format_into", issue = "138215")]
738+
pub fn format_into(self, buf: &mut NumBuffer<Self>) -> &str {
739+
// FIXME: Once const generics are better, use `NumberBufferTrait::BUF_SIZE` as generic const
740+
// for `fmt_u128_inner`.
741+
//
742+
// In the meantime, we have to use a slice starting at index 1 and add 1 to the returned
743+
// offset to ensure the number is correctly generated at the end of the buffer.
744+
let mut offset = fmt_u128_inner(self.unsigned_abs(), &mut buf.buf[1..]) + 1;
745+
// Only difference between signed and unsigned are these 4 lines.
746+
if self < 0 {
747+
offset -= 1;
748+
buf.buf[offset].write(b'-');
749+
}
750+
// SAFETY: Starting from `offset`, all elements of the slice have been set.
751+
unsafe { slice_buffer_to_str(&buf.buf, offset) }
752+
}
753+
}
754+
755+
/// Specialized optimization for u128. Instead of taking two items at a time, it splits
756+
/// into at most 2 u64s, and then chunks by 10e16, 10e8, 10e4, 10e2, and then 10e1.
757+
/// It also has to handle 1 last item, as 10^40 > 2^128 > 10^39, whereas
758+
/// 10^20 > 2^64 > 10^19.
759+
///
760+
/// IMPORTANT: `buf` length MUST BE at least 39.
761+
fn fmt_u128_inner(n: u128, buf: &mut [MaybeUninit<u8>]) -> usize {
762+
const MAX_DEC_N: usize = u128::MAX.ilog(10) as usize + 1;
763+
585764
// Optimize common-case zero, which would also need special treatment due to
586765
// its "leading" zero.
587766
if n == 0 {
588-
return f.pad_integral(true, "", "0");
767+
buf[MAX_DEC_N - 1].write(b'0');
768+
return MAX_DEC_N - 1;
589769
}
590770

591-
// U128::MAX has 39 significant-decimals.
592-
const MAX_DEC_N: usize = u128::MAX.ilog(10) as usize + 1;
593-
// Buffer decimals with right alignment.
594-
let mut buf = [MaybeUninit::<u8>::uninit(); MAX_DEC_N];
595-
596771
// Take the 16 least-significant decimals.
597772
let (quot_1e16, mod_1e16) = div_rem_1e16(n);
598773
let (mut remain, mut offset) = if quot_1e16 == 0 {
599774
(mod_1e16, MAX_DEC_N)
600775
} else {
601776
// Write digits at buf[23..39].
602-
enc_16lsd::<{ MAX_DEC_N - 16 }>(&mut buf, mod_1e16);
777+
enc_16lsd::<{ MAX_DEC_N - 16 }>(buf, mod_1e16);
603778

604779
// Take another 16 decimals.
605780
let (quot2, mod2) = div_rem_1e16(quot_1e16);
606781
if quot2 == 0 {
607782
(mod2, MAX_DEC_N - 16)
608783
} else {
609784
// Write digits at buf[7..23].
610-
enc_16lsd::<{ MAX_DEC_N - 32 }>(&mut buf, mod2);
785+
enc_16lsd::<{ MAX_DEC_N - 32 }>(buf, mod2);
611786
// Quot2 has at most 7 decimals remaining after two 1e16 divisions.
612787
(quot2 as u64, MAX_DEC_N - 32)
613788
}
@@ -666,22 +841,23 @@ fn fmt_u128(n: u128, is_nonnegative: bool, f: &mut fmt::Formatter<'_>) -> fmt::R
666841
buf[offset].write(DEC_DIGITS_LUT[last * 2 + 1]);
667842
// not used: remain = 0;
668843
}
844+
offset
845+
}
669846

670-
// SAFETY: All buf content since offset is set.
671-
let written = unsafe { buf.get_unchecked(offset..) };
672-
// SAFETY: Writes use ASCII from the lookup table exclusively.
673-
let as_str = unsafe {
674-
str::from_utf8_unchecked(slice::from_raw_parts(
675-
MaybeUninit::slice_as_ptr(written),
676-
written.len(),
677-
))
678-
};
679-
f.pad_integral(is_nonnegative, "", as_str)
847+
fn fmt_u128(n: u128, is_nonnegative: bool, f: &mut fmt::Formatter<'_>) -> fmt::Result {
848+
// 2^128 is about 3*10^38, so 39 gives an extra byte of space
849+
const MAX_DEC_N: usize = u128::MAX.ilog(10) as usize + 1;
850+
let mut buf = [MaybeUninit::<u8>::uninit(); MAX_DEC_N];
851+
852+
let offset = fmt_u128_inner(n, &mut buf);
853+
// SAFETY: Starting from `offset`, all elements of the slice have been set.
854+
let buf_slice = unsafe { slice_buffer_to_str(&buf, offset) };
855+
f.pad_integral(is_nonnegative, "", buf_slice)
680856
}
681857

682858
/// Encodes the 16 least-significant decimals of n into `buf[OFFSET .. OFFSET +
683859
/// 16 ]`.
684-
fn enc_16lsd<const OFFSET: usize>(buf: &mut [MaybeUninit<u8>; 39], n: u64) {
860+
fn enc_16lsd<const OFFSET: usize>(buf: &mut [MaybeUninit<u8>], n: u64) {
685861
// Consume the least-significant decimals from a working copy.
686862
let mut remain = n;
687863

0 commit comments

Comments
 (0)