Skip to content

Allow the global allocator to use thread-local storage and std::thread::current() #144465

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions library/core/src/alloc/global.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,31 @@ use crate::{cmp, ptr};
/// Whether allocations happen or not is not part of the program behavior, even if it
/// could be detected via an allocator that tracks allocations by printing or otherwise
/// having side effects.
///
/// # Re-entrance
///
/// When implementing a global allocator one has to be careful not to create an infinitely recursive
/// implementation by accident, as many constructs in the Rust standard library may allocate in
/// their implementation. For example, on some platforms [`std::sync::Mutex`] may allocate, so using
/// it is highly problematic in a global allocator.
///
/// Generally speaking for this reason one should stick to library features available through
/// [`core`], and avoid using [`std`] in a global allocator. A few features from [`std`] are
/// guaranteed to not use the global allocator:
///
/// - [`std::thread_local`],
/// - [`std::thread::current`],
/// - [`std::thread::park`] and [`std::thread::Thread`]'s [`unpark`] method and
/// [`Clone`] implementation.
///
/// [`std`]: ../../std/index.html
/// [`std::sync::Mutex`]: ../../std/sync/struct.Mutex.html
/// [`std::thread_local`]: ../../std/macro.thread_local.html
/// [`std::thread::current`]: ../../std/thread/fn.current.html
/// [`std::thread::park`]: ../../std/thread/fn.park.html
/// [`std::thread::Thread`]: ../../std/thread/struct.Thread.html
/// [`unpark`]: ../../std/thread/struct.Thread.html#method.unpark

#[stable(feature = "global_alloc", since = "1.28.0")]
pub unsafe trait GlobalAlloc {
/// Allocates memory as described by the given `layout`.
Expand Down
9 changes: 8 additions & 1 deletion library/std/src/alloc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
//!
//! This attribute allows configuring the choice of global allocator.
//! You can use this to implement a completely custom global allocator
//! to route all default allocation requests to a custom object.
//! to route all[^system-alloc] default allocation requests to a custom object.
//!
//! ```rust
//! use std::alloc::{GlobalAlloc, System, Layout};
Expand Down Expand Up @@ -52,6 +52,13 @@
//!
//! The `#[global_allocator]` can only be used once in a crate
//! or its recursive dependencies.
//!
//! [^system-alloc]: Note that the Rust standard library internals may still
//! directly call [`System`] when necessary (for example for the runtime
//! support typically required to implement a global allocator, see [re-entrance] on [`GlobalAlloc`]
//! for more details).
//!
//! [re-entrance]: trait.GlobalAlloc.html#re-entrance

#![deny(unsafe_op_in_unsafe_fn)]
#![stable(feature = "alloc_module", since = "1.28.0")]
Expand Down
15 changes: 5 additions & 10 deletions library/std/src/sys/thread_local/destructors/list.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,14 @@
use crate::alloc::System;
use crate::cell::RefCell;
use crate::sys::thread_local::guard;

#[thread_local]
static DTORS: RefCell<Vec<(*mut u8, unsafe extern "C" fn(*mut u8))>> = RefCell::new(Vec::new());
static DTORS: RefCell<Vec<(*mut u8, unsafe extern "C" fn(*mut u8)), System>> =
RefCell::new(Vec::new_in(System));

pub unsafe fn register(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
let Ok(mut dtors) = DTORS.try_borrow_mut() else {
// This point can only be reached if the global allocator calls this
// function again.
// FIXME: maybe use the system allocator instead?
rtabort!("the global allocator may not use TLS with destructors");
};

let Ok(mut dtors) = DTORS.try_borrow_mut() else { rtabort!("unreachable") };
guard::enable();

dtors.push((t, dtor));
}

Expand All @@ -36,7 +31,7 @@ pub unsafe fn run() {
}
None => {
// Free the list memory.
*dtors = Vec::new();
*dtors = Vec::new_in(System);
break;
}
}
Expand Down
6 changes: 5 additions & 1 deletion library/std/src/sys/thread_local/key/xous.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@

use core::arch::asm;

use crate::alloc::System;
use crate::mem::ManuallyDrop;
use crate::os::xous::ffi::{MemoryFlags, map_memory, unmap_memory};
use crate::ptr;
Expand Down Expand Up @@ -151,7 +152,10 @@ struct Node {
}

unsafe fn register_dtor(key: Key, dtor: Dtor) {
let mut node = ManuallyDrop::new(Box::new(Node { key, dtor, next: ptr::null_mut() }));
// We use the System allocator here to avoid interfering with a potential
// Global allocator using thread-local storage.
let mut node =
ManuallyDrop::new(Box::new_in(Node { key, dtor, next: ptr::null_mut() }, System));

#[allow(unused_unsafe)]
let mut head = unsafe { DTORS.load(Acquire) };
Expand Down
12 changes: 8 additions & 4 deletions library/std/src/sys/thread_local/os.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use super::key::{Key, LazyKey, get, set};
use super::{abort_on_dtor_unwind, guard};
use crate::alloc::System;
use crate::cell::Cell;
use crate::marker::PhantomData;
use crate::ptr;
Expand Down Expand Up @@ -95,8 +96,11 @@ impl<T: 'static> Storage<T> {
return ptr::null();
}

let value = Box::new(Value { value: i.and_then(Option::take).unwrap_or_else(f), key });
let ptr = Box::into_raw(value);
// We use the System allocator here to avoid interfering with a potential
// Global allocator using thread-local storage.
let value =
Box::new_in(Value { value: i.and_then(Option::take).unwrap_or_else(f), key }, System);
let ptr = Box::into_raw_with_allocator(value).0;

// SAFETY:
// * key came from a `LazyKey` and is thus correct.
Expand All @@ -114,7 +118,7 @@ impl<T: 'static> Storage<T> {
// initializer has already returned and the next scope only starts
// after we return the pointer. Therefore, there can be no references
// to the old value.
drop(unsafe { Box::from_raw(old) });
drop(unsafe { Box::from_raw_in(old, System) });
}

// SAFETY: We just created this value above.
Expand All @@ -133,7 +137,7 @@ unsafe extern "C" fn destroy_value<T: 'static>(ptr: *mut u8) {
// Note that to prevent an infinite loop we reset it back to null right
// before we return from the destructor ourselves.
abort_on_dtor_unwind(|| {
let ptr = unsafe { Box::from_raw(ptr as *mut Value<T>) };
let ptr = unsafe { Box::from_raw_in(ptr as *mut Value<T>, System) };
let key = ptr.key;
// SAFETY: `key` is the TLS key `ptr` was stored under.
unsafe { set(key, ptr::without_provenance_mut(1)) };
Expand Down
19 changes: 10 additions & 9 deletions library/std/src/thread/current.rs
Original file line number Diff line number Diff line change
Expand Up @@ -236,15 +236,16 @@ fn init_current(current: *mut ()) -> Thread {
// extra TLS write above shouldn't matter. The alternative is nearly always
// a stack overflow.

// If you came across this message, contact the author of your allocator.
// If you are said author: A surprising amount of functions inside the
// standard library (e.g. `Mutex`, `thread_local!`, `File` when using long
// paths, even `panic!` when using unwinding), need memory allocation, so
// you'll get circular dependencies all over the place when using them.
// I (joboet) highly recommend using only APIs from core in your allocator
// and implementing your own system abstractions. Still, if you feel that
// a particular API should be entirely allocation-free, feel free to open
// an issue on the Rust repository, we'll see what we can do.
// If you came across this message, contact the author of your
// allocator. If you are said author: A surprising amount of functions
// inside the standard library (e.g. `Mutex`, `File` when using long
// paths, even `panic!` when using unwinding), need memory allocation,
// so you'll get circular dependencies all over the place when using
// them. I (joboet) highly recommend using only APIs from core in your
// allocator and implementing your own system abstractions. Still, if
// you feel that a particular API should be entirely allocation-free,
// feel free to open an issue on the Rust repository, we'll see what we
// can do.
rtabort!(
"\n\
Attempted to access thread-local data while allocating said data.\n\
Expand Down
7 changes: 6 additions & 1 deletion library/std/src/thread/local.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,17 @@ use crate::fmt;
/// [`with`]) within a thread, and values that implement [`Drop`] get
/// destructed when a thread exits. Some platform-specific caveats apply, which
/// are explained below.
/// Note that, should the destructor panics, the whole process will be [aborted].
/// Note that, should the destructor panic, the whole process will be [aborted].
/// On platforms where initialization requires memory allocation, this is
/// performed directly through [`System`], allowing the [global allocator]
/// to make use of thread local storage.
///
/// A `LocalKey`'s initializer cannot recursively depend on itself. Using a
/// `LocalKey` in this way may cause panics, aborts or infinite recursion on
/// the first call to `with`.
///
/// [`System`]: crate::alloc::System
/// [global allocator]: crate::alloc
/// [aborted]: crate::process::abort
///
/// # Single-thread Synchronization
Expand Down
62 changes: 46 additions & 16 deletions library/std/src/thread/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@
#[cfg(all(test, not(any(target_os = "emscripten", target_os = "wasi"))))]
mod tests;

use crate::alloc::System;
use crate::any::Any;
use crate::cell::UnsafeCell;
use crate::ffi::CStr;
Expand Down Expand Up @@ -1230,21 +1231,45 @@ impl ThreadId {
}
}
} else {
use crate::sync::{Mutex, PoisonError};

static COUNTER: Mutex<u64> = Mutex::new(0);
use crate::cell::SyncUnsafeCell;
use crate::hint::spin_loop;
use crate::sync::atomic::{Atomic, AtomicBool};
use crate::thread::yield_now;

// If we don't have a 64-bit atomic we use a small spinlock. We don't use Mutex
// here as we might be trying to get the current thread id in the global allocator,
// and on some platforms Mutex requires allocation.
static COUNTER_LOCKED: Atomic<bool> = AtomicBool::new(false);
static COUNTER: SyncUnsafeCell<u64> = SyncUnsafeCell::new(0);

// Acquire lock.
let mut spin = 0;
while COUNTER_LOCKED.compare_exchange_weak(false, true, Ordering::Acquire, Ordering::Relaxed).is_err() {
if spin <= 3 {
for _ in 0..(1 << spin) {
spin_loop();
}
} else {
yield_now();
}
spin += 1;
}

let mut counter = COUNTER.lock().unwrap_or_else(PoisonError::into_inner);
let Some(id) = counter.checked_add(1) else {
// in case the panic handler ends up calling `ThreadId::new()`,
// avoid reentrant lock acquire.
drop(counter);
exhausted();
let id;
// SAFETY: we have an exclusive lock on the counter.
unsafe {
id = (*COUNTER.get()).saturating_add(1);
(*COUNTER.get()) = id;
};

*counter = id;
drop(counter);
ThreadId(NonZero::new(id).unwrap())
// Release the lock.
COUNTER_LOCKED.store(false, Ordering::Release);

if id == u64::MAX {
exhausted()
} else {
ThreadId(NonZero::new(id).unwrap())
}
}
}
}
Expand Down Expand Up @@ -1432,7 +1457,10 @@ impl Inner {
///
/// [`thread::current`]: current::current
pub struct Thread {
inner: Pin<Arc<Inner>>,
// We use the System allocator such that creating or dropping this handle
// does not interfere with a potential Global allocator using thread-local
// storage.
inner: Pin<Arc<Inner, System>>,
}

impl Thread {
Expand All @@ -1445,7 +1473,7 @@ impl Thread {
// SAFETY: We pin the Arc immediately after creation, so its address never
// changes.
let inner = unsafe {
let mut arc = Arc::<Inner>::new_uninit();
let mut arc = Arc::<Inner, _>::new_uninit_in(System);
let ptr = Arc::get_mut_unchecked(&mut arc).as_mut_ptr();
(&raw mut (*ptr).name).write(name);
(&raw mut (*ptr).id).write(id);
Expand Down Expand Up @@ -1602,7 +1630,7 @@ impl Thread {
pub fn into_raw(self) -> *const () {
// Safety: We only expose an opaque pointer, which maintains the `Pin` invariant.
let inner = unsafe { Pin::into_inner_unchecked(self.inner) };
Arc::into_raw(inner) as *const ()
Arc::into_raw_with_allocator(inner).0 as *const ()
}

/// Constructs a `Thread` from a raw pointer.
Expand All @@ -1624,7 +1652,9 @@ impl Thread {
#[unstable(feature = "thread_raw", issue = "97523")]
pub unsafe fn from_raw(ptr: *const ()) -> Thread {
// Safety: Upheld by caller.
unsafe { Thread { inner: Pin::new_unchecked(Arc::from_raw(ptr as *const Inner)) } }
unsafe {
Thread { inner: Pin::new_unchecked(Arc::from_raw_in(ptr as *const Inner, System)) }
}
}

fn cname(&self) -> Option<&CStr> {
Expand Down
64 changes: 64 additions & 0 deletions tests/ui/threads-sendsync/tls-in-global-alloc.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
//@ run-pass
//@ needs-threads

use std::alloc::{GlobalAlloc, Layout, System};
use std::thread::Thread;
use std::sync::atomic::{AtomicUsize, AtomicBool, Ordering};

static GLOBAL: AtomicUsize = AtomicUsize::new(0);

struct Local(Thread);

thread_local! {
static LOCAL: Local = {
GLOBAL.fetch_or(1, Ordering::Relaxed);
Local(std::thread::current())
};
}

impl Drop for Local {
fn drop(&mut self) {
GLOBAL.fetch_or(2, Ordering::Relaxed);
}
}

static SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS: AtomicBool = AtomicBool::new(false);

#[global_allocator]
static ALLOC: Alloc = Alloc;
struct Alloc;

unsafe impl GlobalAlloc for Alloc {
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
// Make sure we aren't re-entrant.
assert!(!SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.load(Ordering::Relaxed));
SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(true, Ordering::Relaxed);
LOCAL.with(|local| {
assert!(local.0.id() == std::thread::current().id());
});
let ret = unsafe { System.alloc(layout) };
SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(false, Ordering::Relaxed);
ret
}

unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
// Make sure we aren't re-entrant.
assert!(!SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.load(Ordering::Relaxed));
SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(true, Ordering::Relaxed);
LOCAL.with(|local| {
assert!(local.0.id() == std::thread::current().id());
});
unsafe { System.dealloc(ptr, layout) }
SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(false, Ordering::Relaxed);
}
}

fn main() {
std::thread::spawn(|| {
std::hint::black_box(vec![1, 2]);
assert!(GLOBAL.load(Ordering::Relaxed) == 1);
})
.join()
.unwrap();
assert!(GLOBAL.load(Ordering::Relaxed) == 3);
}
Loading