Skip to content

Commit 1a8eaa8

Browse files
committed
Auto merge of #144287 - nnethercote:Symbol-with_interner, r=<try>
Introduce `Symbol::with_interner`. It lets you get the contents of multiple symbols with a single TLS lookup and interner lock, instead of one per symbol. r? `@ghost`
2 parents 9748d87 + 8a5bcdd commit 1a8eaa8

File tree

7 files changed

+111
-67
lines changed

7 files changed

+111
-67
lines changed

compiler/rustc_ast/src/ast.rs

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -167,25 +167,27 @@ impl Path {
167167
///
168168
/// Panics if `path` is empty or a segment after the first is `kw::PathRoot`.
169169
pub fn join_path_syms(path: impl IntoIterator<Item = impl Borrow<Symbol>>) -> String {
170-
// This is a guess at the needed capacity that works well in practice. It is slightly faster
171-
// than (a) starting with an empty string, or (b) computing the exact capacity required.
172-
// `8` works well because it's about the right size and jemalloc's size classes are all
173-
// multiples of 8.
174-
let mut iter = path.into_iter();
175-
let len_hint = iter.size_hint().1.unwrap_or(1);
176-
let mut s = String::with_capacity(len_hint * 8);
177-
178-
let first_sym = *iter.next().unwrap().borrow();
179-
if first_sym != kw::PathRoot {
180-
s.push_str(first_sym.as_str());
181-
}
182-
for sym in iter {
183-
let sym = *sym.borrow();
184-
debug_assert_ne!(sym, kw::PathRoot);
185-
s.push_str("::");
186-
s.push_str(sym.as_str());
187-
}
188-
s
170+
Symbol::with_interner(|interner| {
171+
// This is a guess at the needed capacity that works well in practice. It is slightly
172+
// faster than (a) starting with an empty string, or (b) computing the exact capacity
173+
// required. `8` works well because it's about the right size and jemalloc's size classes
174+
// are all multiples of 8.
175+
let mut iter = path.into_iter();
176+
let len_hint = iter.size_hint().1.unwrap_or(1);
177+
178+
let mut s = String::with_capacity(len_hint * 8);
179+
let first_sym = *iter.next().unwrap().borrow();
180+
if first_sym != kw::PathRoot {
181+
s.push_str(interner.get_str(first_sym));
182+
}
183+
for sym in iter {
184+
let sym = *sym.borrow();
185+
debug_assert_ne!(sym, kw::PathRoot);
186+
s.push_str("::");
187+
s.push_str(interner.get_str(sym));
188+
}
189+
s
190+
})
189191
}
190192

191193
/// Like `join_path_syms`, but for `Ident`s. This function is necessary because

compiler/rustc_macros/src/symbols.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {
288288
const SYMBOL_DIGITS_BASE: u32 = #symbol_digits_base;
289289

290290
/// The number of predefined symbols; this is the first index for
291-
/// extra pre-interned symbols in an Interner created via
291+
/// extra pre-interned symbols in an interner created via
292292
/// [`Interner::with_extra_symbols`].
293293
pub const PREDEFINED_SYMBOLS_COUNT: u32 = #predefined_symbols_count;
294294

compiler/rustc_resolve/src/rustdoc.rs

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -141,26 +141,32 @@ pub fn unindent_doc_fragments(docs: &mut [DocFragment]) {
141141
// In here, the `min_indent` is 1 (because non-sugared fragment are always counted with minimum
142142
// 1 whitespace), meaning that "hello!" will be considered a codeblock because it starts with 4
143143
// (5 - 1) whitespaces.
144-
let Some(min_indent) = docs
145-
.iter()
146-
.map(|fragment| {
147-
fragment
148-
.doc
149-
.as_str()
150-
.lines()
151-
.filter(|line| line.chars().any(|c| !c.is_whitespace()))
152-
.map(|line| {
153-
// Compare against either space or tab, ignoring whether they are
154-
// mixed or not.
155-
let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
156-
whitespace
157-
+ (if fragment.kind == DocFragmentKind::SugaredDoc { 0 } else { add })
144+
let Some(min_indent) = ({
145+
Symbol::with_interner(|interner| {
146+
docs.iter()
147+
.map(|fragment| {
148+
interner
149+
.get_str(fragment.doc)
150+
.lines()
151+
.filter(|line| line.chars().any(|c| !c.is_whitespace()))
152+
.map(|line| {
153+
// Compare against either space or tab, ignoring whether they are
154+
// mixed or not.
155+
let whitespace =
156+
line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
157+
whitespace
158+
+ (if fragment.kind == DocFragmentKind::SugaredDoc {
159+
0
160+
} else {
161+
add
162+
})
163+
})
164+
.min()
165+
.unwrap_or(usize::MAX)
158166
})
159167
.min()
160-
.unwrap_or(usize::MAX)
161168
})
162-
.min()
163-
else {
169+
}) else {
164170
return;
165171
};
166172

compiler/rustc_span/src/symbol.rs

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2632,6 +2632,19 @@ impl Symbol {
26322632
})
26332633
}
26342634

2635+
/// Runs `f` with access to the symbol interner, so you can call
2636+
/// `interner.get_str(sym)` instead of `sym.as_str()`.
2637+
///
2638+
/// This is for performance: it lets you get the contents of multiple
2639+
/// symbols with a single TLS lookup and interner lock operation, instead
2640+
/// of doing those operations once per symbol.
2641+
pub fn with_interner<R>(f: impl FnOnce(&InternerInner) -> R) -> R {
2642+
with_session_globals(|session_globals| {
2643+
let inner = session_globals.symbol_interner.0.lock();
2644+
f(&inner)
2645+
})
2646+
}
2647+
26352648
pub fn as_u32(self) -> u32 {
26362649
self.0.as_u32()
26372650
}
@@ -2733,14 +2746,13 @@ impl<CTX> HashStable<CTX> for ByteSymbol {
27332746
// string with identical contents (e.g. "foo" and b"foo") are both interned,
27342747
// only one copy will be stored and the resulting `Symbol` and `ByteSymbol`
27352748
// will have the same index.
2749+
//
2750+
// There must only be one of these, otherwise its easy to mix up symbols
2751+
// between interners.
27362752
pub(crate) struct Interner(Lock<InternerInner>);
27372753

27382754
// The `&'static [u8]`s in this type actually point into the arena.
2739-
//
2740-
// This type is private to prevent accidentally constructing more than one
2741-
// `Interner` on the same thread, which makes it easy to mix up `Symbol`s
2742-
// between `Interner`s.
2743-
struct InternerInner {
2755+
pub struct InternerInner {
27442756
arena: DroplessArena,
27452757
byte_strs: FxIndexSet<&'static [u8]>,
27462758
}
@@ -2794,21 +2806,37 @@ impl Interner {
27942806
/// Get the symbol as a string.
27952807
///
27962808
/// [`Symbol::as_str()`] should be used in preference to this function.
2809+
/// (Or [`Symbol::with_interner()`] + [`InternerInner::get_str()`]).
27972810
fn get_str(&self, symbol: Symbol) -> &str {
2798-
let byte_str = self.get_inner(symbol.0.as_usize());
2811+
let inner = self.0.lock();
2812+
let byte_str = inner.byte_strs.get_index(symbol.0.as_usize()).unwrap();
27992813
// SAFETY: known to be a UTF8 string because it's a `Symbol`.
28002814
unsafe { str::from_utf8_unchecked(byte_str) }
28012815
}
28022816

28032817
/// Get the symbol as a string.
28042818
///
28052819
/// [`ByteSymbol::as_byte_str()`] should be used in preference to this function.
2820+
/// (Or [`Symbol::with_interner()`] + [`InternerInner::get_byte_str()`]).
28062821
fn get_byte_str(&self, symbol: ByteSymbol) -> &[u8] {
2807-
self.get_inner(symbol.0.as_usize())
2822+
let inner = self.0.lock();
2823+
inner.byte_strs.get_index(symbol.0.as_usize()).unwrap()
28082824
}
2825+
}
28092826

2810-
fn get_inner(&self, index: usize) -> &[u8] {
2811-
self.0.lock().byte_strs.get_index(index).unwrap()
2827+
impl InternerInner {
2828+
/// Get the symbol as a string. Used with `with_interner`.
2829+
#[inline]
2830+
pub fn get_str(&self, symbol: Symbol) -> &str {
2831+
let byte_str = self.byte_strs.get_index(symbol.0.as_usize()).unwrap();
2832+
// SAFETY: known to be a UTF8 string because it's a `Symbol`.
2833+
unsafe { str::from_utf8_unchecked(byte_str) }
2834+
}
2835+
2836+
/// Get the symbol as a string. Used with `with_interner`.
2837+
#[inline]
2838+
pub fn get_byte_str(&self, symbol: ByteSymbol) -> &[u8] {
2839+
self.byte_strs.get_index(symbol.0.as_usize()).unwrap()
28122840
}
28132841
}
28142842

src/librustdoc/html/render/print_item.rs

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use rustc_hir::def_id::DefId;
1212
use rustc_index::IndexVec;
1313
use rustc_middle::ty::{self, TyCtxt};
1414
use rustc_span::hygiene::MacroKind;
15-
use rustc_span::symbol::{Symbol, sym};
15+
use rustc_span::symbol::{InternerInner, Symbol, sym};
1616
use tracing::{debug, info};
1717

1818
use super::type_layout::document_type_layout;
@@ -333,7 +333,12 @@ fn item_module(cx: &Context<'_>, item: &clean::Item, items: &[clean::Item]) -> i
333333
}
334334
}
335335

336-
fn cmp(i1: &clean::Item, i2: &clean::Item, tcx: TyCtxt<'_>) -> Ordering {
336+
fn cmp(
337+
i1: &clean::Item,
338+
i2: &clean::Item,
339+
interner: &InternerInner,
340+
tcx: TyCtxt<'_>,
341+
) -> Ordering {
337342
let rty1 = reorder(i1.type_());
338343
let rty2 = reorder(i2.type_());
339344
if rty1 != rty2 {
@@ -349,7 +354,9 @@ fn item_module(cx: &Context<'_>, item: &clean::Item, items: &[clean::Item]) -> i
349354
return is_stable2.cmp(&is_stable1);
350355
}
351356
match (i1.name, i2.name) {
352-
(Some(name1), Some(name2)) => compare_names(name1.as_str(), name2.as_str()),
357+
(Some(name1), Some(name2)) => {
358+
compare_names(interner.get_str(name1), interner.get_str(name2))
359+
}
353360
(Some(_), None) => Ordering::Greater,
354361
(None, Some(_)) => Ordering::Less,
355362
(None, None) => Ordering::Equal,
@@ -360,7 +367,9 @@ fn item_module(cx: &Context<'_>, item: &clean::Item, items: &[clean::Item]) -> i
360367

361368
match cx.shared.module_sorting {
362369
ModuleSorting::Alphabetical => {
363-
not_stripped_items.sort_by(|(_, i1), (_, i2)| cmp(i1, i2, tcx));
370+
Symbol::with_interner(|interner| {
371+
not_stripped_items.sort_by(|(_, i1), (_, i2)| cmp(i1, i2, interner, tcx));
372+
});
364373
}
365374
ModuleSorting::DeclarationOrder => {}
366375
}

src/librustdoc/html/render/search_index.rs

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -105,12 +105,16 @@ pub(crate) fn build_index(
105105
let mut aliases: BTreeMap<String, Vec<usize>> = BTreeMap::new();
106106

107107
// Sort search index items. This improves the compressibility of the search index.
108-
cache.search_index.sort_unstable_by(|k1, k2| {
109-
// `sort_unstable_by_key` produces lifetime errors
110-
// HACK(rustdoc): should not be sorting `CrateNum` or `DefIndex`, this will soon go away, too
111-
let k1 = (&k1.path, k1.name.as_str(), &k1.ty, k1.parent.map(|id| (id.index, id.krate)));
112-
let k2 = (&k2.path, k2.name.as_str(), &k2.ty, k2.parent.map(|id| (id.index, id.krate)));
113-
Ord::cmp(&k1, &k2)
108+
Symbol::with_interner(|inner| {
109+
cache.search_index.sort_unstable_by(|k1, k2| {
110+
// `sort_unstable_by_key` produces lifetime errors
111+
// HACK(rustdoc): should not be sorting `CrateNum` or `DefIndex`, this will soon go
112+
// away, too
113+
let pair = |k: &IndexItem| k.parent.map(|id| (id.index, id.krate));
114+
let k1 = (&k1.path, inner.get_str(k1.name), &k1.ty, pair(k1));
115+
let k2 = (&k2.path, inner.get_str(k2.name), &k2.ty, pair(k2));
116+
Ord::cmp(&k1, &k2)
117+
});
114118
});
115119

116120
// Set up alias indexes.

src/librustdoc/html/url_parts_builder.rs

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -146,22 +146,17 @@ impl<'a> Extend<&'a str> for UrlPartsBuilder {
146146

147147
impl FromIterator<Symbol> for UrlPartsBuilder {
148148
fn from_iter<T: IntoIterator<Item = Symbol>>(iter: T) -> Self {
149-
// This code has to be duplicated from the `&str` impl because of
150-
// `Symbol::as_str`'s lifetimes.
151-
let iter = iter.into_iter();
152-
let mut builder = Self::with_capacity_bytes(AVG_PART_LENGTH * iter.size_hint().0);
153-
iter.for_each(|part| builder.push(part.as_str()));
154-
builder
149+
Symbol::with_interner(|interner| {
150+
UrlPartsBuilder::from_iter(iter.into_iter().map(|sym| interner.get_str(sym)))
151+
})
155152
}
156153
}
157154

158155
impl Extend<Symbol> for UrlPartsBuilder {
159156
fn extend<T: IntoIterator<Item = Symbol>>(&mut self, iter: T) {
160-
// This code has to be duplicated from the `&str` impl because of
161-
// `Symbol::as_str`'s lifetimes.
162-
let iter = iter.into_iter();
163-
self.buf.reserve(AVG_PART_LENGTH * iter.size_hint().0);
164-
iter.for_each(|part| self.push(part.as_str()));
157+
Symbol::with_interner(|interner| {
158+
self.extend(iter.into_iter().map(|sym| interner.get_str(sym)))
159+
})
165160
}
166161
}
167162

0 commit comments

Comments
 (0)