Skip to content

Commit e36b844

Browse files
authored
Rollup merge of #144472 - okaneco:char_bound, r=Mark-Simulacrum
str: Mark unstable `round_char_boundary` feature functions as const Mark `floor_char_boundary`, `ceil_char_boundary` const Simplify the implementations, reducing the number of arithmetic operations It seems unnecessary to do the lower/upper bounds calculations and extra slicing when we can jump straight to inspecting the bytes, assuming the underlying data is valid UTF-8. Tracking issue #93743
2 parents 21120e2 + 7f7d343 commit e36b844

File tree

1 file changed

+26
-14
lines changed

1 file changed

+26
-14
lines changed

library/core/src/str/mod.rs

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -407,17 +407,22 @@ impl str {
407407
/// ```
408408
#[unstable(feature = "round_char_boundary", issue = "93743")]
409409
#[inline]
410-
pub fn floor_char_boundary(&self, index: usize) -> usize {
410+
pub const fn floor_char_boundary(&self, index: usize) -> usize {
411411
if index >= self.len() {
412412
self.len()
413413
} else {
414-
let lower_bound = index.saturating_sub(3);
415-
let new_index = self.as_bytes()[lower_bound..=index]
416-
.iter()
417-
.rposition(|b| b.is_utf8_char_boundary());
418-
419-
// SAFETY: we know that the character boundary will be within four bytes
420-
unsafe { lower_bound + new_index.unwrap_unchecked() }
414+
let mut i = index;
415+
while i > 0 {
416+
if self.as_bytes()[i].is_utf8_char_boundary() {
417+
break;
418+
}
419+
i -= 1;
420+
}
421+
422+
// The character boundary will be within four bytes of the index
423+
debug_assert!(i >= index.saturating_sub(3));
424+
425+
i
421426
}
422427
}
423428

@@ -445,15 +450,22 @@ impl str {
445450
/// ```
446451
#[unstable(feature = "round_char_boundary", issue = "93743")]
447452
#[inline]
448-
pub fn ceil_char_boundary(&self, index: usize) -> usize {
453+
pub const fn ceil_char_boundary(&self, index: usize) -> usize {
449454
if index >= self.len() {
450455
self.len()
451456
} else {
452-
let upper_bound = Ord::min(index + 4, self.len());
453-
self.as_bytes()[index..upper_bound]
454-
.iter()
455-
.position(|b| b.is_utf8_char_boundary())
456-
.map_or(upper_bound, |pos| pos + index)
457+
let mut i = index;
458+
while i < self.len() {
459+
if self.as_bytes()[i].is_utf8_char_boundary() {
460+
break;
461+
}
462+
i += 1;
463+
}
464+
465+
// The character boundary will be within four bytes of the index
466+
debug_assert!(i <= index + 3);
467+
468+
i
457469
}
458470
}
459471

0 commit comments

Comments
 (0)