From 6760cd2859db68d47d62089d9805d860fb37d518 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Kijewski?= Date: Sun, 13 Jul 2025 22:33:49 +0200 Subject: [PATCH] core: make `str::split_at_unchecked()` inline This PR adds `#[inline]` to the method `str::split_at_unchecked()`. This is done for two reasons: 1. The method is tiny, e.g. on AMD-64 (): ```asm movq %rdi, %rax subq %rcx, %rdx movq %rsi, (%rdi) addq %rcx, %rsi movq %rcx, 8(%rdi) movq %rsi, 16(%rdi) movq %rdx, 24(%rdi) retq ``` 2. More importantly, inlining the method enables further automatic optimizations. E.g. if you split at index 3, then in the compiler (rustc, llvm or both) knows that this code cannot fail, and the panicking path is omitted in the generated code: ```rust pub fn punctuation(i: &str) -> Result<(), ()> { const THREE_CHARS: &[[u8; 3]] = &[*b"<<=", *b">>=", *b"...", *b"..="]; if let Some((head, _)) = i.split_at_checked(3) && THREE_CHARS.contains(&head.as_bytes().try_into().unwrap()) { Ok(()) } else { Err(()) } } ```
Without PR ```asm playground::punctuation: subq $40, %rsp movq %rsi, %rdx movq %rdi, %rsi movb $1, %al cmpq $3, %rdx ja .LBB2_2 je .LBB2_3 .LBB2_11: addq $40, %rsp retq .LBB2_2: cmpb $-64, 3(%rsi) jl .LBB2_11 .LBB2_3: leaq 8(%rsp), %rdi movl $3, %ecx callq *core::str::::split_at_unchecked@GOTPCREL(%rip) movq 8(%rsp), %rcx movb $1, %al testq %rcx, %rcx je .LBB2_11 cmpq $3, 16(%rsp) jne .LBB2_12 movzwl (%rcx), %edx movzbl 2(%rcx), %ecx shll $16, %ecx orl %edx, %ecx cmpl $4013115, %ecx jg .LBB2_8 cmpl $3026478, %ecx je .LBB2_10 cmpl $4009518, %ecx je .LBB2_10 jmp .LBB2_11 .LBB2_8: cmpl $4013630, %ecx je .LBB2_10 cmpl $4013116, %ecx jne .LBB2_11 .LBB2_10: xorl %eax, %eax addq $40, %rsp retq .LBB2_12: leaq .Lanon.d98a7fbb86d10a97c24516e267466134.2(%rip), %rdi leaq .Lanon.d98a7fbb86d10a97c24516e267466134.1(%rip), %rcx leaq .Lanon.d98a7fbb86d10a97c24516e267466134.6(%rip), %r8 leaq 7(%rsp), %rdx movl $43, %esi callq *core::result::unwrap_failed@GOTPCREL(%rip) ```
With PR ```asm playground::punctuation: movb $1, %al cmpq $3, %rsi ja .LBB0_2 je .LBB0_3 .LBB0_9: retq .LBB0_2: cmpb $-64, 3(%rdi) jl .LBB0_9 .LBB0_3: movzwl (%rdi), %eax movzbl 2(%rdi), %ecx shll $16, %ecx orl %eax, %ecx movb $1, %al cmpl $4013115, %ecx jg .LBB0_6 cmpl $3026478, %ecx je .LBB0_8 cmpl $4009518, %ecx je .LBB0_8 jmp .LBB0_9 .LBB0_6: cmpl $4013630, %ecx je .LBB0_8 cmpl $4013116, %ecx jne .LBB0_9 .LBB0_8: xorl %eax, %eax retq ```
--- library/core/src/str/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs index 32a2298817538..b1ce643e87cd8 100644 --- a/library/core/src/str/mod.rs +++ b/library/core/src/str/mod.rs @@ -952,6 +952,7 @@ impl str { /// /// The caller must ensure that `mid` is a valid byte offset from the start /// of the string and falls on the boundary of a UTF-8 code point. + #[inline] const unsafe fn split_at_unchecked(&self, mid: usize) -> (&str, &str) { let len = self.len(); let ptr = self.as_ptr();