From e881ad1cd271383d17b494b0ab6a1facf7c0d71e Mon Sep 17 00:00:00 2001 From: Ohad Ravid Date: Sat, 1 Nov 2025 08:15:13 -0700 Subject: [PATCH 1/5] Added Windows bindings for FLS-related functions --- library/std/src/sys/pal/windows/c/bindings.txt | 6 ++++++ library/std/src/sys/pal/windows/c/windows_sys.rs | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/library/std/src/sys/pal/windows/c/bindings.txt b/library/std/src/sys/pal/windows/c/bindings.txt index c21d1de81341f..ed12474ccaa4f 100644 --- a/library/std/src/sys/pal/windows/c/bindings.txt +++ b/library/std/src/sys/pal/windows/c/bindings.txt @@ -2130,6 +2130,11 @@ FindExSearchNameMatch FindFirstFileExW FindNextFileW FIONBIO +FLS_OUT_OF_INDEXES +FlsAlloc +FlsFree +FlsGetValue +FlsSetValue FlushFileBuffers FORMAT_MESSAGE_ALLOCATE_BUFFER FORMAT_MESSAGE_ARGUMENT_ARRAY @@ -2318,6 +2323,7 @@ OPEN_ALWAYS OPEN_EXISTING OpenProcessToken OVERLAPPED +PFLS_CALLBACK_FUNCTION PIPE_ACCEPT_REMOTE_CLIENTS PIPE_ACCESS_DUPLEX PIPE_ACCESS_INBOUND diff --git a/library/std/src/sys/pal/windows/c/windows_sys.rs b/library/std/src/sys/pal/windows/c/windows_sys.rs index eb54efd1c1fe0..ce17a922ea715 100644 --- a/library/std/src/sys/pal/windows/c/windows_sys.rs +++ b/library/std/src/sys/pal/windows/c/windows_sys.rs @@ -27,6 +27,10 @@ windows_link::link!("kernel32.dll" "system" fn ExitProcess(uexitcode : u32) -> ! windows_link::link!("kernel32.dll" "system" fn FindClose(hfindfile : HANDLE) -> BOOL); windows_link::link!("kernel32.dll" "system" fn FindFirstFileExW(lpfilename : PCWSTR, finfolevelid : FINDEX_INFO_LEVELS, lpfindfiledata : *mut core::ffi::c_void, fsearchop : FINDEX_SEARCH_OPS, lpsearchfilter : *const core::ffi::c_void, dwadditionalflags : FIND_FIRST_EX_FLAGS) -> HANDLE); windows_link::link!("kernel32.dll" "system" fn FindNextFileW(hfindfile : HANDLE, lpfindfiledata : *mut WIN32_FIND_DATAW) -> BOOL); +windows_link::link!("kernel32.dll" "system" fn FlsAlloc(lpcallback : PFLS_CALLBACK_FUNCTION) -> u32); +windows_link::link!("kernel32.dll" "system" fn FlsFree(dwflsindex : u32) -> BOOL); +windows_link::link!("kernel32.dll" "system" fn FlsGetValue(dwflsindex : u32) -> *mut core::ffi::c_void); +windows_link::link!("kernel32.dll" "system" fn FlsSetValue(dwflsindex : u32, lpflsdata : *const core::ffi::c_void) -> BOOL); windows_link::link!("kernel32.dll" "system" fn FlushFileBuffers(hfile : HANDLE) -> BOOL); windows_link::link!("kernel32.dll" "system" fn FormatMessageW(dwflags : FORMAT_MESSAGE_OPTIONS, lpsource : *const core::ffi::c_void, dwmessageid : u32, dwlanguageid : u32, lpbuffer : PWSTR, nsize : u32, arguments : *const *const i8) -> u32); windows_link::link!("kernel32.dll" "system" fn FreeEnvironmentStringsW(penv : PCWSTR) -> BOOL); @@ -2667,6 +2671,7 @@ impl Default for FLOATING_SAVE_AREA { unsafe { core::mem::zeroed() } } } +pub const FLS_OUT_OF_INDEXES: u32 = 4294967295u32; pub const FORMAT_MESSAGE_ALLOCATE_BUFFER: FORMAT_MESSAGE_OPTIONS = 256u32; pub const FORMAT_MESSAGE_ARGUMENT_ARRAY: FORMAT_MESSAGE_OPTIONS = 8192u32; pub const FORMAT_MESSAGE_FROM_HMODULE: FORMAT_MESSAGE_OPTIONS = 2048u32; @@ -3037,6 +3042,8 @@ pub struct OVERLAPPED_0_0 { } pub type PCSTR = *const u8; pub type PCWSTR = *const u16; +pub type PFLS_CALLBACK_FUNCTION = + Option; pub type PIO_APC_ROUTINE = Option< unsafe extern "system" fn( apccontext: *mut core::ffi::c_void, From afa69573f756c15a62c9e3414d66cbc0af31c5ca Mon Sep 17 00:00:00 2001 From: Ohad Ravid Date: Sat, 1 Nov 2025 08:22:22 -0700 Subject: [PATCH 2/5] Replace `thread_local::guard` on Windows with an FLS-based impl --- .../std/src/sys/pal/windows/c/bindings.txt | 1 + .../std/src/sys/pal/windows/c/windows_sys.rs | 1 + .../std/src/sys/thread_local/guard/windows.rs | 168 +++++++++--------- library/std/src/thread/local.rs | 14 +- library/std/tests/thread_local/tests.rs | 55 ++++++ 5 files changed, 143 insertions(+), 96 deletions(-) diff --git a/library/std/src/sys/pal/windows/c/bindings.txt b/library/std/src/sys/pal/windows/c/bindings.txt index ed12474ccaa4f..9c5e9424b22ec 100644 --- a/library/std/src/sys/pal/windows/c/bindings.txt +++ b/library/std/src/sys/pal/windows/c/bindings.txt @@ -2263,6 +2263,7 @@ IPV6_DROP_MEMBERSHIP IPV6_MREQ IPV6_MULTICAST_LOOP IPV6_V6ONLY +IsThreadAFiber LINGER listen LocalFree diff --git a/library/std/src/sys/pal/windows/c/windows_sys.rs b/library/std/src/sys/pal/windows/c/windows_sys.rs index ce17a922ea715..668c2b50b7b87 100644 --- a/library/std/src/sys/pal/windows/c/windows_sys.rs +++ b/library/std/src/sys/pal/windows/c/windows_sys.rs @@ -71,6 +71,7 @@ windows_link::link!("kernel32.dll" "system" fn GetWindowsDirectoryW(lpbuffer : P windows_link::link!("kernel32.dll" "system" fn InitOnceBeginInitialize(lpinitonce : *mut INIT_ONCE, dwflags : u32, fpending : *mut BOOL, lpcontext : *mut *mut core::ffi::c_void) -> BOOL); windows_link::link!("kernel32.dll" "system" fn InitOnceComplete(lpinitonce : *mut INIT_ONCE, dwflags : u32, lpcontext : *const core::ffi::c_void) -> BOOL); windows_link::link!("kernel32.dll" "system" fn InitializeProcThreadAttributeList(lpattributelist : LPPROC_THREAD_ATTRIBUTE_LIST, dwattributecount : u32, dwflags : u32, lpsize : *mut usize) -> BOOL); +windows_link::link!("kernel32.dll" "system" fn IsThreadAFiber() -> BOOL); windows_link::link!("kernel32.dll" "system" fn LocalFree(hmem : HLOCAL) -> HLOCAL); windows_link::link!("kernel32.dll" "system" fn LockFileEx(hfile : HANDLE, dwflags : LOCK_FILE_FLAGS, dwreserved : u32, nnumberofbytestolocklow : u32, nnumberofbytestolockhigh : u32, lpoverlapped : *mut OVERLAPPED) -> BOOL); windows_link::link!("kernel32.dll" "system" fn MoveFileExW(lpexistingfilename : PCWSTR, lpnewfilename : PCWSTR, dwflags : MOVE_FILE_FLAGS) -> BOOL); diff --git a/library/std/src/sys/thread_local/guard/windows.rs b/library/std/src/sys/thread_local/guard/windows.rs index f747129465d6d..7e56c8b7324dd 100644 --- a/library/std/src/sys/thread_local/guard/windows.rs +++ b/library/std/src/sys/thread_local/guard/windows.rs @@ -1,103 +1,99 @@ //! Support for Windows TLS destructors. //! -//! Unfortunately, Windows does not provide a nice API to provide a destructor -//! for a TLS variable. Thus, the solution here ended up being a little more -//! obscure, but fear not, the internet has informed me [1][2] that this solution -//! is not unique (no way I could have thought of it as well!). The key idea is -//! to insert some hook somewhere to run arbitrary code on thread termination. -//! With this in place we'll be able to run anything we like, including all -//! TLS destructors! +//! Windows has an API to provide a destructor for a FLS (fiber local storage) variable, +//! which behaves similarly to a TLS variable for our purpose [1]. //! -//! In order to realize this, all TLS destructors are tracked by *us*, not the -//! Windows runtime. This means that we have a global list of destructors for +//! All TLS destructors are tracked by *us*, not the Windows runtime. +//! This means that we have a global list of destructors for //! each TLS key or variable that we know about. //! -//! # What's up with CRT$XLB? -//! -//! For anything about TLS destructors to work on Windows, we have to be able -//! to run *something* when a thread exits. To do so, we place a very special -//! static in a very special location. If this is encoded in just the right -//! way, the kernel's loader is apparently nice enough to run some function -//! of ours whenever a thread exits! How nice of the kernel! -//! -//! Lots of detailed information can be found in source [1] above, but the -//! gist of it is that this is leveraging a feature of Microsoft's PE format -//! (executable format) which is not actually used by any compilers today. -//! This apparently translates to any callbacks in the ".CRT$XLB" section -//! being run on certain events. -//! -//! So after all that, we use the compiler's `#[link_section]` feature to place -//! a callback pointer into the magic section so it ends up being called. -//! -//! # What's up with this callback? -//! -//! The callback specified receives a number of parameters from... someone! -//! (the kernel? the runtime? I'm not quite sure!) There are a few events that -//! this gets invoked for, but we're currently only interested on when a -//! thread or a process "detaches" (exits). The process part happens for the -//! last thread and the thread part happens for any normal thread. -//! -//! # The article mentions weird stuff about "/INCLUDE"? -//! -//! It sure does! Specifically we're talking about this quote: -//! -//! ```quote -//! The Microsoft run-time library facilitates this process by defining a -//! memory image of the TLS Directory and giving it the special name -//! “__tls_used” (Intel x86 platforms) or “_tls_used” (other platforms). The -//! linker looks for this memory image and uses the data there to create the -//! TLS Directory. Other compilers that support TLS and work with the -//! Microsoft linker must use this same technique. -//! ``` -//! -//! Basically what this means is that if we want support for our TLS -//! destructors/our hook being called then we need to make sure the linker does -//! not omit this symbol. Otherwise it will omit it and our callback won't be -//! wired up. -//! -//! We don't actually use the `/INCLUDE` linker flag here like the article -//! mentions because the Rust compiler doesn't propagate linker flags, but -//! instead we use a shim function which performs a volatile 1-byte load from -//! the address of the _tls_used symbol to ensure it sticks around. -//! -//! [1]: https://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way -//! [2]: https://github.com/ChromiumWebApps/chromium/blob/master/base/threading/thread_local_storage_win.cc#L42 +//! [1]: https://devblogs.microsoft.com/oldnewthing/20191011-00/?p=102989 use core::ffi::c_void; +use core::sync::atomic::{AtomicU32, Ordering}; +use crate::cell::Cell; use crate::ptr; -use crate::sys::c; +use crate::sys::c::{self, FLS_OUT_OF_INDEXES}; + +pub type Key = u32; -unsafe extern "C" { - #[link_name = "_tls_used"] - static TLS_USED: u8; +unsafe fn create(dtor: c::PFLS_CALLBACK_FUNCTION) -> Key { + let key_result = unsafe { c::FlsAlloc(dtor) }; + + if key_result == c::FLS_OUT_OF_INDEXES { + rtabort!("out of FLS keys"); + } + + key_result } -pub fn enable() { - // When destructors are used, we need to add a reference to the _tls_used - // symbol provided by the CRT, otherwise the TLS support code will get - // GC'd by the linker and our callback won't be called. - unsafe { ptr::from_ref(&TLS_USED).read_volatile() }; - // We also need to reference CALLBACK to make sure it does not get GC'd - // by the compiler/LLVM. The callback will end up inside the TLS - // callback array pointed to by _TLS_USED through linker shenanigans, - // but as far as the compiler is concerned, it looks like the data is - // unused, so we need this hack to prevent it from disappearing. - unsafe { ptr::from_ref(&CALLBACK).read_volatile() }; + +unsafe fn set(key: Key, ptr: *const c_void) { + let result = unsafe { c::FlsSetValue(key, ptr) }; + + if result == c::FALSE { + rtabort!("failed to set FLS value"); + } +} + +fn is_thread_a_fiber() -> bool { + let res = unsafe { c::IsThreadAFiber() }; + res == c::TRUE } -#[unsafe(link_section = ".CRT$XLB")] -#[cfg_attr(miri, used)] // Miri only considers explicitly `#[used]` statics for `lookup_link_section` -pub static CALLBACK: unsafe extern "system" fn(*mut c_void, u32, *mut c_void) = tls_callback; +static KEY: AtomicU32 = AtomicU32::new(FLS_OUT_OF_INDEXES); + +pub fn enable() { + #[thread_local] + static REGISTERED: Cell = Cell::new(false); + + if !REGISTERED.replace(true) { + let current_key = KEY.load(Ordering::Acquire); -unsafe extern "system" fn tls_callback(_h: *mut c_void, dw_reason: u32, _pv: *mut c_void) { - if dw_reason == c::DLL_THREAD_DETACH || dw_reason == c::DLL_PROCESS_DETACH { - unsafe { - #[cfg(target_thread_local)] - super::super::destructors::run(); - #[cfg(not(target_thread_local))] - super::super::key::run_dtors(); + // If we already allocated a key, we only need to set it to a non-null value so that the dtor is run. + let key = if current_key != FLS_OUT_OF_INDEXES { + current_key + } else { + // Otherwise, we try to allocate a key. + let new_key = unsafe { create(Some(cleanup)) }; - crate::rt::thread_cleanup(); - } + // Now we need to set this key to be used by everyone else. + // If we won the race, our key is the right one and we can set it to non-null value. + // If we lost, we'll use the winning key. + // Note: we are not freeing our losing key since according to the docs + // > It is expected that DLLs call [the FlsFree] function (if at all) only during DLL_PROCESS_DETACH. + match KEY.compare_exchange(current_key, new_key, Ordering::Release, Ordering::Acquire) { + Ok(_) => new_key, + Err(other_key) => other_key, + } + }; + + // Setting the key's value to non-zero will cause the dtor callback to be called when the thread exits. + // We only set the key once per thread, so the destructors are guaranteed to run at most once (fibers cannot be moved between threads). + unsafe { set(key, ptr::without_provenance(1)) }; + } +} + +unsafe extern "system" fn cleanup(_ptr: *const c_void) { + // Avoid running the hook if we are in a fiber. + // This will cause destructors of thread locals to not run, leaking them. + // Thread-local runtime state will not be cleaned. + // + // We need to verify that we won't run the destructors *before* the thread exits, + // but if the fiber that registered the callback is deleted, the thread might still be running other fibers. + // + // By checking that we are not running in a fiber here, we are guaranteed that the hook is only running during the thread's exit. + // See also the `fiber_does_not_trigger_dtor` test. + if is_thread_a_fiber() { + return; } + + unsafe { + #[cfg(target_thread_local)] + super::super::destructors::run(); + #[cfg(not(target_thread_local))] + super::super::key::run_dtors(); + } + + crate::rt::thread_cleanup(); } diff --git a/library/std/src/thread/local.rs b/library/std/src/thread/local.rs index 1318d8dc27809..b252a6e8489ae 100644 --- a/library/std/src/thread/local.rs +++ b/library/std/src/thread/local.rs @@ -98,17 +98,11 @@ use crate::fmt; /// run on the thread that causes the process to exit. This is because the /// other threads may be forcibly terminated. /// -/// ## Synchronization in thread-local destructors +/// If a thread is [converted into a fiber], destructors will not be run unless +/// the fiber is [converted back into a thread] before the underlying thread exits. /// -/// On Windows, synchronization operations (such as [`JoinHandle::join`]) in -/// thread local destructors are prone to deadlocks and so should be avoided. -/// This is because the [loader lock] is held while a destructor is run. The -/// lock is acquired whenever a thread starts or exits or when a DLL is loaded -/// or unloaded. Therefore these events are blocked for as long as a thread -/// local destructor is running. -/// -/// [loader lock]: https://docs.microsoft.com/en-us/windows/win32/dlls/dynamic-link-library-best-practices -/// [`JoinHandle::join`]: crate::thread::JoinHandle::join +/// [converted into a fiber]: https://learn.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-convertthreadtofiber +/// [converted back into a thread]: https://learn.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-convertfibertothread /// [`with`]: LocalKey::with #[cfg_attr(not(test), rustc_diagnostic_item = "LocalKey")] #[stable(feature = "rust1", since = "1.0.0")] diff --git a/library/std/tests/thread_local/tests.rs b/library/std/tests/thread_local/tests.rs index 5df1a0e25ee51..a72b31879f370 100644 --- a/library/std/tests/thread_local/tests.rs +++ b/library/std/tests/thread_local/tests.rs @@ -21,6 +21,12 @@ impl Signal { set = cvar.wait(set).unwrap(); } } + + fn is_set(&self) -> bool { + let (set, _cvar) = &*self.0; + let set = set.lock().unwrap(); + *set + } } struct NotifyOnDrop(Signal); @@ -97,6 +103,7 @@ fn smoke_dtor() { }); }); signal.wait(); + assert!(signal.is_set()); t.join().unwrap(); } } @@ -393,3 +400,51 @@ fn thread_current_in_dtor() { let name = name.as_ref().unwrap(); assert_eq!(name, "test"); } + +// Test that if a thread uses fibers while the dtors callback is running, +// we do NOT trigger dtors. +// This prevents a UAF if a fiber is the first to use Tls and is deleted, +// like in the example here: +// https://github.com/rust-lang/rust/pull/148799#issuecomment-3731806901 +#[cfg(target_os = "windows")] +#[test] +fn fiber_does_not_trigger_dtor() { + use core::ffi::c_void; + use std::ptr; + + unsafe extern "system" { + fn ConvertFiberToThread() -> i32; + fn ConvertThreadToFiber(lpParameter: *const c_void) -> *mut c_void; + } + + thread_local!(static FOO: UnsafeCell> = UnsafeCell::new(None)); + let signal = Signal::default(); + + let signal2 = signal.clone(); + let t = thread::spawn(move || unsafe { + let mut signal = Some(signal2); + FOO.with(|f| { + *f.get() = Some(NotifyOnDrop(signal.take().unwrap())); + }); + let _main = ConvertThreadToFiber(ptr::null()); + // A user can then switch to a new fiber and delete the main fiber. + // If destructors are triggered when the fiber is deleted, + // the new fiber will be able to observe already-destructed values. + }); + t.join().unwrap(); + assert!(!signal.is_set()); + + // As long as we stop using fibers before thread teardown, everything works as expected. + let signal2 = signal.clone(); + let t = thread::spawn(move || unsafe { + let mut signal = Some(signal2); + FOO.with(|f| { + *f.get() = Some(NotifyOnDrop(signal.take().unwrap())); + }); + let _ = ConvertThreadToFiber(ptr::null()); + let _ = ConvertFiberToThread(); + }); + signal.wait(); + assert!(signal.is_set()); + t.join().unwrap(); +} From 5f5445dd9d63f7cf976208d797cc1e83061ad5fd Mon Sep 17 00:00:00 2001 From: Ohad Ravid Date: Fri, 23 Jan 2026 07:23:18 -0800 Subject: [PATCH 3/5] Update fibers tls test to show that using tls for the first time in a fiber works --- library/std/tests/thread_local/tests.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/std/tests/thread_local/tests.rs b/library/std/tests/thread_local/tests.rs index a72b31879f370..7324b880af67d 100644 --- a/library/std/tests/thread_local/tests.rs +++ b/library/std/tests/thread_local/tests.rs @@ -438,10 +438,10 @@ fn fiber_does_not_trigger_dtor() { let signal2 = signal.clone(); let t = thread::spawn(move || unsafe { let mut signal = Some(signal2); + let _ = ConvertThreadToFiber(ptr::null()); FOO.with(|f| { *f.get() = Some(NotifyOnDrop(signal.take().unwrap())); }); - let _ = ConvertThreadToFiber(ptr::null()); let _ = ConvertFiberToThread(); }); signal.wait(); From 1721bffb4929a38cf842d5a53ba2a5d1c04a5f36 Mon Sep 17 00:00:00 2001 From: Ohad Ravid Date: Mon, 19 Jan 2026 20:53:26 +0200 Subject: [PATCH 4/5] Add support for the `Fls{Alloc,GetValue,SetValue}` functions in miri --- src/tools/miri/src/shims/tls.rs | 99 +++++++++++++- .../miri/src/shims/windows/foreign_items.rs | 63 +++++++++ src/tools/miri/tests/pass/tls/windows-tls.rs | 125 +++++++++++++++++- .../miri/tests/pass/tls/windows-tls.stdout | 7 + 4 files changed, 283 insertions(+), 11 deletions(-) create mode 100644 src/tools/miri/tests/pass/tls/windows-tls.stdout diff --git a/src/tools/miri/src/shims/tls.rs b/src/tools/miri/src/shims/tls.rs index 3ecd9b1ead38e..4df9100ba1fde 100644 --- a/src/tools/miri/src/shims/tls.rs +++ b/src/tools/miri/src/shims/tls.rs @@ -1,7 +1,7 @@ //! Implement thread-local storage. -use std::collections::BTreeMap; use std::collections::btree_map::Entry as BTreeEntry; +use std::collections::{BTreeMap, VecDeque}; use std::task::Poll; use rustc_abi::{ExternAbi, HasDataLayout, Size}; @@ -221,9 +221,11 @@ enum TlsDtorsStatePriv<'tcx> { Init, MacOsDtors, PthreadDtors(RunningDtorState), - /// For Windows Dtors, we store the list of functions that we still have to call. - /// These are functions from the magic `.CRT$XLB` linker section. - WindowsDtors(Vec<(ImmTy<'tcx>, Span)>), + /// For Windows, we support two different ways dtors can be registered. + /// 1. Functions that are registered via the `FlsAlloc` function, which are invoked one by one. + /// 2. Functions from the magic `.CRT$XLB` linker section. + /// We store these as a list of functions that we still have to call. + WindowsDtors(VecDeque, Vec<(ImmTy<'tcx>, Span)>), Done, } @@ -250,8 +252,13 @@ impl<'tcx> TlsDtorsState<'tcx> { Os::Windows => { // Determine which destructors to run. let dtors = this.lookup_windows_tls_dtors()?; + + // Fetch fls keys that have destructors. Keys registered during thread exit will not have their destructor called. + // See also: `schedule_next_windows_fls_dtor`. + let fls_keys_with_dtors = this.lookup_windows_fls_keys_with_dtors()?; + // And move to the next state, that runs them. - break 'new_state WindowsDtors(dtors); + break 'new_state WindowsDtors(fls_keys_with_dtors, dtors); } _ => { // No TLS dtor support. @@ -273,7 +280,13 @@ impl<'tcx> TlsDtorsState<'tcx> { Poll::Ready(()) => break 'new_state Done, } } - WindowsDtors(dtors) => { + WindowsDtors(state, dtors) => { + // Fls destructors are scheduled before the tls callback. + match this.schedule_next_windows_fls_dtor(state)? { + Poll::Pending => return interp_ok(Poll::Pending), // just keep going + Poll::Ready(()) => {} + } + if let Some((dtor, span)) = dtors.pop() { this.schedule_windows_tls_dtor(dtor, span)?; return interp_ok(Poll::Pending); // we stay in this state (but `dtors` got shorter) @@ -306,6 +319,22 @@ trait EvalContextPrivExt<'tcx>: crate::MiriInterpCxExt<'tcx> { interp_ok(this.lookup_link_section(|section| section == ".CRT$XLB")?) } + /// Lookup all the FLS keys (which are stored as TLS keys) that have a destructor. + /// See also: `schedule_next_windows_fls_dtor`. + fn lookup_windows_fls_keys_with_dtors(&mut self) -> InterpResult<'tcx, VecDeque> { + let this = self.eval_context_mut(); + + interp_ok( + this.machine + .tls + .keys + .iter() + .filter(|(_, data)| data.dtor.is_some()) + .map(|(key, _)| *key) + .collect(), + ) + } + fn schedule_windows_tls_dtor(&mut self, dtor: ImmTy<'tcx>, span: Span) -> InterpResult<'tcx> { let this = self.eval_context_mut(); @@ -392,4 +421,62 @@ trait EvalContextPrivExt<'tcx>: crate::MiriInterpCxExt<'tcx> { interp_ok(Poll::Ready(())) } + + /// Schedule a Windows FLS destructor, if one is found. + fn schedule_next_windows_fls_dtor( + &mut self, + remaining_keys: &mut VecDeque, + ) -> InterpResult<'tcx, Poll<()>> { + let this = self.eval_context_mut(); + let active_thread = this.active_thread(); + + // According to [PflsCallbackFunction's docs], + // > If the FLS slot is in use, `FlsCallback`` is called on .. thread exit .. + // However, the exact order and semantics are not defined. + // We use the following implementation, which matches both observed behavior and + // Wine's implementation of the same logic in the [`RtlProcessFlsData`] function. + // 1. Fetch all the keys once (in `lookup_windows_fls_keys_with_dtors`). + // 2. Go over them one by one, in order. + // 3. Fetch the value associated with the key. + // 4. If it is non-zero, call the registered dtor. + // New keys registered during thread exit are ignored, but values set before the dtor is scheduled are visible. + // [PflsCallbackFunction's docs]: https://learn.microsoft.com/en-us/windows/win32/api/winnt/nc-winnt-pfls_callback_function + // [`RtlProcessFlsData`]: https://github.com/wine-mirror/wine/blob/wine-11.0/dlls/ntdll/thread.c#L679 + while let Some(key) = remaining_keys.pop_front() { + // Fetch dtor for this `key`. + // If the key doesn't have a dtor, move on to the next key. + let (data, dtor) = match this.machine.tls.keys.get(&key) { + Some(TlsEntry { data, dtor: Some(dtor) }) => (data, dtor), + _ => continue, + }; + + let (instance, span) = dtor.to_owned(); + + // If the key has no value in this thread, move on to the next key. + let ptr = match data.get(&active_thread) { + Some(data_scalar) => *data_scalar, + None => continue, + }; + + assert!( + ptr.to_target_usize(this).unwrap() != 0, + "TLS key's value can't be null (should be absent instead)" + ); + + trace!("Running TLS dtor {:?} on {:?} at {:?}", instance, ptr, active_thread); + + this.call_thread_root_function( + instance, + ExternAbi::System { unwind: false }, + &[ImmTy::from_scalar(ptr, this.machine.layouts.mut_raw_ptr)], + None, + span, + )?; + + return interp_ok(Poll::Pending); + } + + // We are done scheduling all the keys. + interp_ok(Poll::Ready(())) + } } diff --git a/src/tools/miri/src/shims/windows/foreign_items.rs b/src/tools/miri/src/shims/windows/foreign_items.rs index 0bdf6bb785056..5bd31da4ffcb8 100644 --- a/src/tools/miri/src/shims/windows/foreign_items.rs +++ b/src/tools/miri/src/shims/windows/foreign_items.rs @@ -669,6 +669,69 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { // Return success (`1`). this.write_int(1, dest)?; } + + // Fiber-local storage - similar to TLS but supports destructors. + "FlsAlloc" => { + // Create key and return it. + let [dtor] = this.check_shim_sig( + shim_sig!(extern "system" fn(winapi::PFLS_CALLBACK_FUNCTION) -> u32), + link_name, + abi, + args, + )?; + let dtor = this.read_pointer(dtor)?; + + // Extract the function type out of the signature (that seems easier than constructing it ourselves). + let dtor = if !this.ptr_is_null(dtor)? { + Some(( + this.get_ptr_fn(dtor)?.as_instance()?, + this.machine.current_user_relevant_span(), + )) + } else { + None + }; + + let key = this.machine.tls.create_tls_key(dtor, dest.layout.size)?; + this.write_scalar(Scalar::from_uint(key, dest.layout.size), dest)?; + } + "FlsGetValue" => { + let [key] = this.check_shim_sig( + shim_sig!(extern "system" fn(u32) -> *mut _), + link_name, + abi, + args, + )?; + let key = u128::from(this.read_scalar(key)?.to_u32()?); + let active_thread = this.active_thread(); + let ptr = this.machine.tls.load_tls(key, active_thread, this)?; + this.write_scalar(ptr, dest)?; + } + "FlsSetValue" => { + let [key, new_ptr] = this.check_shim_sig( + shim_sig!(extern "system" fn(u32, *mut _) -> winapi::BOOL), + link_name, + abi, + args, + )?; + let key = u128::from(this.read_scalar(key)?.to_u32()?); + let active_thread = this.active_thread(); + let new_data = this.read_scalar(new_ptr)?; + this.machine.tls.store_tls(key, active_thread, new_data, &*this.tcx)?; + + // Return success (`1`). + this.write_int(1, dest)?; + } + "IsThreadAFiber" => { + let [] = this.check_shim_sig( + shim_sig!(extern "system" fn() -> winapi::BOOL), + link_name, + abi, + args, + )?; + + // Return FALSE, as Miri does not support fibers. + this.write_int(0, dest)?; + } // Access to command-line arguments "GetCommandLineW" => { diff --git a/src/tools/miri/tests/pass/tls/windows-tls.rs b/src/tools/miri/tests/pass/tls/windows-tls.rs index 9c5e8eea4d3f2..4b463ca04af32 100644 --- a/src/tools/miri/tests/pass/tls/windows-tls.rs +++ b/src/tools/miri/tests/pass/tls/windows-tls.rs @@ -1,18 +1,133 @@ //@only-target: windows # this directly tests windows-only functions use std::ffi::c_void; -use std::ptr; +use std::{ptr, thread}; + +pub type BOOL = i32; +pub const FALSE: BOOL = 0i32; +pub const TRUE: BOOL = 1i32; extern "system" { fn TlsAlloc() -> u32; - fn TlsSetValue(key: u32, val: *mut c_void) -> i32; + fn TlsSetValue(key: u32, val: *mut c_void) -> BOOL; fn TlsGetValue(key: u32) -> *mut c_void; - fn TlsFree(key: u32) -> i32; + fn TlsFree(key: u32) -> BOOL; + + fn FlsAlloc(lpcallback: Option) -> u32; + fn FlsSetValue(key: u32, val: *mut c_void) -> BOOL; + fn FlsGetValue(key: u32) -> *mut c_void; + + fn IsThreadAFiber() -> BOOL; +} + +extern "system" fn dtor_unreachable(_val: *mut c_void) { + unreachable!() +} + +fn fls_0_zero_value_does_not_run() { + let key = unsafe { FlsAlloc(Some(dtor_unreachable)) }; + + assert_eq!(unsafe { FlsSetValue(key, ptr::without_provenance_mut(1)) }, TRUE); + assert_eq!(unsafe { FlsGetValue(key).addr() }, 1); + assert_eq!(unsafe { FlsSetValue(key, ptr::without_provenance_mut(0)) }, TRUE); +} + +fn fls_1_dtor_simple() { + extern "system" fn dtor(val: *mut c_void) { + assert_eq!(val.addr(), 1); + println!("fls_1_dtor_simple"); + } + + let key = unsafe { FlsAlloc(Some(dtor)) }; + assert_eq!(unsafe { FlsSetValue(key, ptr::without_provenance_mut(1)) }, TRUE); + assert_eq!(unsafe { FlsGetValue(key).addr() }, 1); +} + +fn fls_2_dtor_update_value_ignored() { + extern "system" fn early_dtor(_val: *mut c_void) { + println!("fls_2.1_early_dtor"); + } + + extern "system" fn later_dtor(val: *mut c_void) { + println!("fls_2.2_later_dtor"); + + // Updating a different fls slot's value doesn't cause their dtor to run, if it already did. + let early_key = val as u32; + assert_eq!( + unsafe { FlsSetValue(early_key, ptr::without_provenance_mut(1)) }, + TRUE + ); + + // Registering new fls slots doesn't cause their dtor to run. + let a_new_key_in_dtor = unsafe { FlsAlloc(Some(dtor_unreachable)) }; + assert_eq!(unsafe { FlsSetValue(a_new_key_in_dtor, ptr::without_provenance_mut(1)) }, TRUE); + } + + let early_key = unsafe { FlsAlloc(Some(early_dtor)) }; + let later_key = unsafe { FlsAlloc(Some(later_dtor)) }; + assert_eq!(unsafe { FlsSetValue(early_key, ptr::without_provenance_mut(1)) }, TRUE); + assert_eq!(unsafe { FlsSetValue(later_key, ptr::without_provenance_mut(early_key as usize)) }, TRUE); +} + +fn fls_3_dtor_update_value_skipped_ignored() { + extern "system" fn later_dtor(val: *mut c_void) { + println!("fls_3_later_dtor"); + + // Updating a different fls slot's value doesn't cause their dtor to run, if it was already skipped. + let early_key = val as u32; + assert_eq!( + unsafe { FlsSetValue(early_key, ptr::without_provenance_mut(1)) }, + TRUE + ); + } + + let early_key = unsafe { FlsAlloc(Some(dtor_unreachable)) }; + let later_key = unsafe { FlsAlloc(Some(later_dtor)) }; + assert_eq!(unsafe { FlsSetValue(early_key, ptr::without_provenance_mut(0)) }, TRUE); + assert_eq!(unsafe { FlsSetValue(later_key, ptr::without_provenance_mut(early_key as usize)) }, TRUE); +} + +fn fls_4_dtor_update_value_used() { + extern "system" fn early_dtor(val: *mut c_void) { + println!("fls_4.1_early_dtor"); + + // Updating a different fls slot's value affect their dtor, if it hasn't yet run. + let later_key = val as u32; + assert_eq!(unsafe { FlsSetValue(later_key, ptr::without_provenance_mut(1)) }, TRUE); + } + + extern "system" fn later_dtor(_val: *mut c_void) { + println!("fls_4.2_later_dtor"); + } + + let early_key = unsafe { FlsAlloc(Some(early_dtor)) }; + let later_key = unsafe { FlsAlloc(Some(later_dtor)) }; + assert_eq!(unsafe { FlsSetValue(early_key, ptr::without_provenance_mut(later_key as usize)) }, TRUE); + + // Setting to zero explicitly, dtor won't rub unless `early_dtor` changes this value. + assert_eq!(unsafe { FlsSetValue(later_key, ptr::without_provenance_mut(0)) }, TRUE); +} + +fn fls() { + assert_eq!(unsafe { IsThreadAFiber() }, FALSE); + + fls_0_zero_value_does_not_run(); + fls_1_dtor_simple(); + fls_2_dtor_update_value_ignored(); + fls_3_dtor_update_value_skipped_ignored(); + fls_4_dtor_update_value_used(); } fn main() { let key = unsafe { TlsAlloc() }; - assert!(unsafe { TlsSetValue(key, ptr::without_provenance_mut(1)) != 0 }); + assert_eq!(unsafe { TlsSetValue(key, ptr::without_provenance_mut(1)) }, TRUE); assert_eq!(unsafe { TlsGetValue(key).addr() }, 1); - assert!(unsafe { TlsFree(key) != 0 }); + assert_eq!(unsafe { TlsFree(key) }, TRUE); + + thread::spawn(|| { + fls(); + println!("exiting thread"); + }) + .join() + .unwrap(); } diff --git a/src/tools/miri/tests/pass/tls/windows-tls.stdout b/src/tools/miri/tests/pass/tls/windows-tls.stdout new file mode 100644 index 0000000000000..16ff416828854 --- /dev/null +++ b/src/tools/miri/tests/pass/tls/windows-tls.stdout @@ -0,0 +1,7 @@ +exiting thread +fls_1_dtor_simple +fls_2.1_early_dtor +fls_2.2_later_dtor +fls_3_later_dtor +fls_4.1_early_dtor +fls_4.2_later_dtor From 66af8be05b364c91853090fa4c963a30c88b26e5 Mon Sep 17 00:00:00 2001 From: Ohad Ravid Date: Fri, 23 Jan 2026 09:57:17 -0800 Subject: [PATCH 5/5] Update Miri's Fls impl to clear the key's value after running it's destructor --- src/tools/miri/src/shims/tls.rs | 40 +++++++++++++---- src/tools/miri/tests/pass/tls/windows-tls.rs | 44 ++++++++++++++++--- .../miri/tests/pass/tls/windows-tls.stdout | 1 + 3 files changed, 71 insertions(+), 14 deletions(-) diff --git a/src/tools/miri/src/shims/tls.rs b/src/tools/miri/src/shims/tls.rs index 4df9100ba1fde..f67e45caf9317 100644 --- a/src/tools/miri/src/shims/tls.rs +++ b/src/tools/miri/src/shims/tls.rs @@ -22,13 +22,22 @@ pub struct TlsEntry<'tcx> { } #[derive(Default, Debug)] -struct RunningDtorState { +struct RunningPthreadDtorState { /// The last TlsKey used to retrieve a TLS destructor. `None` means that we /// have not tried to retrieve a TLS destructor yet or that we already tried /// all keys. last_key: Option, } +#[derive(Default, Debug)] +struct RunningWindowsDtorState { + /// The last TlsKey for which a TLS destructor ran. `None` means that we + /// have not run a TLS destructor yet. This is used to clear the TLS value after the dtor returned. + last_key: Option, + // Keys that have destructors that we still need to run. + remaining_keys: VecDeque, +} + #[derive(Debug)] pub struct TlsData<'tcx> { /// The Key to use for the next thread-local allocation. @@ -220,12 +229,12 @@ enum TlsDtorsStatePriv<'tcx> { #[default] Init, MacOsDtors, - PthreadDtors(RunningDtorState), + PthreadDtors(RunningPthreadDtorState), /// For Windows, we support two different ways dtors can be registered. /// 1. Functions that are registered via the `FlsAlloc` function, which are invoked one by one. /// 2. Functions from the magic `.CRT$XLB` linker section. /// We store these as a list of functions that we still have to call. - WindowsDtors(VecDeque, Vec<(ImmTy<'tcx>, Span)>), + WindowsDtors(RunningWindowsDtorState, Vec<(ImmTy<'tcx>, Span)>), Done, } @@ -258,7 +267,7 @@ impl<'tcx> TlsDtorsState<'tcx> { let fls_keys_with_dtors = this.lookup_windows_fls_keys_with_dtors()?; // And move to the next state, that runs them. - break 'new_state WindowsDtors(fls_keys_with_dtors, dtors); + break 'new_state WindowsDtors(RunningWindowsDtorState { last_key: None, remaining_keys: fls_keys_with_dtors }, dtors); } _ => { // No TLS dtor support. @@ -389,7 +398,7 @@ trait EvalContextPrivExt<'tcx>: crate::MiriInterpCxExt<'tcx> { /// a destructor to schedule, and `false` otherwise. fn schedule_next_pthread_tls_dtor( &mut self, - state: &mut RunningDtorState, + state: &mut RunningPthreadDtorState, ) -> InterpResult<'tcx, Poll<()>> { let this = self.eval_context_mut(); let active_thread = this.active_thread(); @@ -425,7 +434,7 @@ trait EvalContextPrivExt<'tcx>: crate::MiriInterpCxExt<'tcx> { /// Schedule a Windows FLS destructor, if one is found. fn schedule_next_windows_fls_dtor( &mut self, - remaining_keys: &mut VecDeque, + state: &mut RunningWindowsDtorState, ) -> InterpResult<'tcx, Poll<()>> { let this = self.eval_context_mut(); let active_thread = this.active_thread(); @@ -436,15 +445,25 @@ trait EvalContextPrivExt<'tcx>: crate::MiriInterpCxExt<'tcx> { // We use the following implementation, which matches both observed behavior and // Wine's implementation of the same logic in the [`RtlProcessFlsData`] function. // 1. Fetch all the keys once (in `lookup_windows_fls_keys_with_dtors`). - // 2. Go over them one by one, in order. + // 2. Go over them one by one, in order, skipping keys without dtors. // 3. Fetch the value associated with the key. // 4. If it is non-zero, call the registered dtor. + // 5. After the dtor is called, clear the key's value, setting it to zero. // New keys registered during thread exit are ignored, but values set before the dtor is scheduled are visible. + // Keys without dtors will not be set to zero. // [PflsCallbackFunction's docs]: https://learn.microsoft.com/en-us/windows/win32/api/winnt/nc-winnt-pfls_callback_function // [`RtlProcessFlsData`]: https://github.com/wine-mirror/wine/blob/wine-11.0/dlls/ntdll/thread.c#L679 - while let Some(key) = remaining_keys.pop_front() { + + // We are done running the last key's destructor, so set it's value to zero. + if let Some(last_key) = state.last_key.take() { + if let Some(TlsEntry { data, .. }) = this.machine.tls.keys.get_mut(&last_key) { + data.remove(&active_thread); + }; + } + + while let Some(key) = state.remaining_keys.pop_front() { // Fetch dtor for this `key`. - // If the key doesn't have a dtor, move on to the next key. + // If the key doesn't have a dtor or does not exist any more, move on to the next key. let (data, dtor) = match this.machine.tls.keys.get(&key) { Some(TlsEntry { data, dtor: Some(dtor) }) => (data, dtor), _ => continue, @@ -465,6 +484,9 @@ trait EvalContextPrivExt<'tcx>: crate::MiriInterpCxExt<'tcx> { trace!("Running TLS dtor {:?} on {:?} at {:?}", instance, ptr, active_thread); + // We'll clear this key's value next time we are called. + state.last_key = Some(key); + this.call_thread_root_function( instance, ExternAbi::System { unwind: false }, diff --git a/src/tools/miri/tests/pass/tls/windows-tls.rs b/src/tools/miri/tests/pass/tls/windows-tls.rs index 4b463ca04af32..b72909ac1dc51 100644 --- a/src/tools/miri/tests/pass/tls/windows-tls.rs +++ b/src/tools/miri/tests/pass/tls/windows-tls.rs @@ -34,17 +34,28 @@ fn fls_0_zero_value_does_not_run() { fn fls_1_dtor_simple() { extern "system" fn dtor(val: *mut c_void) { - assert_eq!(val.addr(), 1); + assert!(!val.is_null()); println!("fls_1_dtor_simple"); + + // Keys are freed in-order. Without a dtor, the early key's value is not zeroed out. + let early_key = val as u32; + assert_eq!(unsafe { FlsGetValue(early_key).addr() }, 1); } - let key = unsafe { FlsAlloc(Some(dtor)) }; - assert_eq!(unsafe { FlsSetValue(key, ptr::without_provenance_mut(1)) }, TRUE); - assert_eq!(unsafe { FlsGetValue(key).addr() }, 1); + let early_key = unsafe { FlsAlloc(None) }; + let later_key = unsafe { FlsAlloc(Some(dtor)) }; + + assert_eq!(unsafe { FlsSetValue(later_key, ptr::without_provenance_mut(1)) }, TRUE); + assert_eq!(unsafe { FlsGetValue(later_key).addr() }, 1); + + assert_eq!(unsafe { FlsSetValue(early_key, ptr::without_provenance_mut(1)) }, TRUE); + // Will be used in the dtor to check early_key's value. + assert_eq!(unsafe { FlsSetValue(later_key, ptr::without_provenance_mut(early_key as usize)) }, TRUE); } fn fls_2_dtor_update_value_ignored() { - extern "system" fn early_dtor(_val: *mut c_void) { + extern "system" fn early_dtor(val: *mut c_void) { + assert!(!val.is_null()); println!("fls_2.1_early_dtor"); } @@ -53,6 +64,13 @@ fn fls_2_dtor_update_value_ignored() { // Updating a different fls slot's value doesn't cause their dtor to run, if it already did. let early_key = val as u32; + + // After the early key's dtor run, the key's value is zeroed out. + assert_eq!( + unsafe { FlsGetValue(early_key).addr() }, + 0 + ); + assert_eq!( unsafe { FlsSetValue(early_key, ptr::without_provenance_mut(1)) }, TRUE @@ -108,6 +126,21 @@ fn fls_4_dtor_update_value_used() { assert_eq!(unsafe { FlsSetValue(later_key, ptr::without_provenance_mut(0)) }, TRUE); } +fn fls_5_dtor_value() { + extern "system" fn dtor(val: *mut c_void) { + assert!(!val.is_null()); + println!("fls_5_dtor_value"); + + // When the key's dtor run, the key's value equals the destructor argument. + let key = val as u32; + assert_eq!(unsafe { FlsGetValue(key) }, val); + } + + let key = unsafe { FlsAlloc(Some(dtor)) }; + + assert_eq!(unsafe { FlsSetValue(key, ptr::without_provenance_mut(key as usize)) }, TRUE); +} + fn fls() { assert_eq!(unsafe { IsThreadAFiber() }, FALSE); @@ -116,6 +149,7 @@ fn fls() { fls_2_dtor_update_value_ignored(); fls_3_dtor_update_value_skipped_ignored(); fls_4_dtor_update_value_used(); + fls_5_dtor_value(); } fn main() { diff --git a/src/tools/miri/tests/pass/tls/windows-tls.stdout b/src/tools/miri/tests/pass/tls/windows-tls.stdout index 16ff416828854..c287aaf23d002 100644 --- a/src/tools/miri/tests/pass/tls/windows-tls.stdout +++ b/src/tools/miri/tests/pass/tls/windows-tls.stdout @@ -5,3 +5,4 @@ fls_2.2_later_dtor fls_3_later_dtor fls_4.1_early_dtor fls_4.2_later_dtor +fls_5_dtor_value