git: 74886657a8d4 - stable/13 - Merge libcxxrt master fd484be8d1e94a1fcf6bc5c67e5c07b65ada19b6

From: Dimitry Andric <dim_at_FreeBSD.org>
Date: Wed, 23 Mar 2022 20:04:51 UTC
The branch stable/13 has been updated by dim:

URL: https://cgit.FreeBSD.org/src/commit/?id=74886657a8d4d811ef2c06303a42950cedd2f367

commit 74886657a8d4d811ef2c06303a42950cedd2f367
Author:     Dimitry Andric <dim@FreeBSD.org>
AuthorDate: 2022-03-09 19:45:01 +0000
Commit:     Dimitry Andric <dim@FreeBSD.org>
CommitDate: 2022-03-23 20:03:21 +0000

    Merge libcxxrt master fd484be8d1e94a1fcf6bc5c67e5c07b65ada19b6
    
    Interesting fixes:
    47661d0 Match libc++abi/libsupc++ when demangling array types
    e44a05c Fix unitialized variable in __cxa_demangle_gnu3 after #6 (#8)
    5088b05 Remove some code duplication.
    fd484be Atomics cleanup (#11)
    
    MFC after:      2 weeks
    
    (cherry picked from commit 56aaed388b0a177b9defb4e6ecb8a716abb3cf33)
---
 contrib/libcxxrt/atomic.h            | 110 +++++++++--
 contrib/libcxxrt/auxhelper.cc        |  25 ++-
 contrib/libcxxrt/exception.cc        |  21 +-
 contrib/libcxxrt/guard.cc            | 368 +++++++++++++++++++++++++----------
 contrib/libcxxrt/libelftc_dem_gnu3.c |   6 +-
 contrib/libcxxrt/memory.cc           |  54 +++--
 contrib/libcxxrt/noexception.cc      |  45 +++++
 7 files changed, 475 insertions(+), 154 deletions(-)

diff --git a/contrib/libcxxrt/atomic.h b/contrib/libcxxrt/atomic.h
index 131ca9f57798..701d05337cf1 100644
--- a/contrib/libcxxrt/atomic.h
+++ b/contrib/libcxxrt/atomic.h
@@ -1,30 +1,102 @@
 
 #ifndef __has_builtin
-#define __has_builtin(x) 0
+#	define __has_builtin(x) 0
 #endif
 #ifndef __has_feature
-#define __has_feature(x) 0
+#	define __has_feature(x) 0
+#endif
+#ifndef __has_extension
+#	define __has_extension(x) 0
+#endif
+
+#if !__has_extension(c_atomic)
+#	define _Atomic(T) T
 #endif
-/**
- * Swap macro that enforces a happens-before relationship with a corresponding
- * ATOMIC_LOAD.
- */
 #if __has_builtin(__c11_atomic_exchange)
-#define ATOMIC_SWAP(addr, val)\
-	__c11_atomic_exchange(reinterpret_cast<_Atomic(__typeof__(val))*>(addr), val, __ATOMIC_ACQ_REL)
-#elif __has_builtin(__sync_swap)
-#define ATOMIC_SWAP(addr, val)\
-	__sync_swap(addr, val)
+#	define ATOMIC_BUILTIN(name) __c11_atomic_##name
 #else
-#define ATOMIC_SWAP(addr, val)\
-	__sync_lock_test_and_set(addr, val)
+#	define ATOMIC_BUILTIN(name) __atomic_##name##_n
 #endif
 
-#if __has_builtin(__c11_atomic_load)
-#define ATOMIC_LOAD(addr)\
-	__c11_atomic_load(reinterpret_cast<_Atomic(__typeof__(*addr))*>(addr), __ATOMIC_ACQUIRE)
+namespace
+{
+	/**
+	 * C++11 memory orders.  We only need a subset of them.
+	 */
+	enum memory_order
+	{
+		/**
+		 * Acquire order.
+		 */
+		acquire = __ATOMIC_ACQUIRE,
+
+		/**
+		 * Release order.
+		 */
+		release = __ATOMIC_RELEASE,
+
+		/**
+		 * Sequentially consistent memory ordering.
+		 */
+		seqcst = __ATOMIC_SEQ_CST
+	};
+
+	/**
+	 * Atomic, implements a subset of `std::atomic`.
+	 */
+	template<typename T>
+	class atomic
+	{
+		/**
+		 * The underlying value.  Use C11 atomic qualification if available.
+		 */
+		_Atomic(T) val;
+
+		public:
+		/**
+		 * Constructor, takes a value.
+		 */
+		atomic(T init) : val(init) {}
+
+		/**
+		 * Atomically load with the specified memory order.
+		 */
+		T load(memory_order order = memory_order::seqcst)
+		{
+			return ATOMIC_BUILTIN(load)(&val, order);
+		}
+
+		/**
+		 * Atomically store with the specified memory order.
+		 */
+		void store(T v, memory_order order = memory_order::seqcst)
+		{
+			return ATOMIC_BUILTIN(store)(&val, v, order);
+		}
+
+		/**
+		 * Atomically exchange with the specified memory order.
+		 */
+		T exchange(T v, memory_order order = memory_order::seqcst)
+		{
+			return ATOMIC_BUILTIN(exchange)(&val, v, order);
+		}
+
+		/**
+		 * Atomically exchange with the specified memory order.
+		 */
+		bool compare_exchange(T &          expected,
+		                      T            desired,
+		                      memory_order order = memory_order::seqcst)
+		{
+#if __has_builtin(__c11_atomic_compare_exchange_strong)
+			return __c11_atomic_compare_exchange_strong(
+			  &val, &expected, desired, order, order);
 #else
-#define ATOMIC_LOAD(addr)\
-	(__sync_synchronize(), *addr)
+			return __atomic_compare_exchange_n(
+			  &val, &expected, desired, true, order, order);
 #endif
-
+		}
+	};
+} // namespace
+#undef ATOMIC_BUILTIN
diff --git a/contrib/libcxxrt/auxhelper.cc b/contrib/libcxxrt/auxhelper.cc
index 3e98da036a11..74b998eef241 100644
--- a/contrib/libcxxrt/auxhelper.cc
+++ b/contrib/libcxxrt/auxhelper.cc
@@ -34,6 +34,21 @@
 #include <stdlib.h>
 #include "stdexcept.h"
 
+namespace {
+	/**
+	 * Throw an exception if we're compiling with exceptions, otherwise abort.
+	 */
+	template<typename T>
+	void throw_exception()
+	{
+#if !defined(_CXXRT_NO_EXCEPTIONS)
+		throw T();
+#else
+		abort();
+#endif
+	}
+}
+
 /**
  * Called to generate a bad cast exception.  This function is intended to allow
  * compilers to insert code generating this exception without needing to
@@ -41,7 +56,7 @@
  */
 extern "C" void __cxa_bad_cast()
 {
-    throw std::bad_cast();
+	throw_exception<std::bad_cast>();
 }
 
 /**
@@ -51,7 +66,7 @@ extern "C" void __cxa_bad_cast()
  */
 extern "C" void __cxa_bad_typeid()
 {
-    throw std::bad_typeid();
+	throw_exception<std::bad_typeid>();
 }
 
 /**
@@ -62,7 +77,7 @@ extern "C" void __cxa_bad_typeid()
  */
 extern "C" void __cxa_pure_virtual()
 {
-    abort();
+	abort();
 }
 
 /**
@@ -73,10 +88,10 @@ extern "C" void __cxa_pure_virtual()
  */
 extern "C" void __cxa_deleted_virtual()
 {
-    abort();
+	abort();
 }
 
 extern "C" void __cxa_throw_bad_array_new_length()
 {
-	throw std::bad_array_new_length();
+	throw_exception<std::bad_array_new_length>();
 }
diff --git a/contrib/libcxxrt/exception.cc b/contrib/libcxxrt/exception.cc
index 0fb26ddb4ed2..2f1dc4030ba4 100644
--- a/contrib/libcxxrt/exception.cc
+++ b/contrib/libcxxrt/exception.cc
@@ -1,5 +1,6 @@
 /* 
  * Copyright 2010-2011 PathScale, Inc. All rights reserved.
+ * Copyright 2021 David Chisnall. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -289,9 +290,9 @@ using namespace ABI_NAMESPACE;
 
 
 /** The global termination handler. */
-static terminate_handler terminateHandler = abort;
+static atomic<terminate_handler> terminateHandler = abort;
 /** The global unexpected exception handler. */
-static unexpected_handler unexpectedHandler = std::terminate;
+static atomic<unexpected_handler> unexpectedHandler = std::terminate;
 
 /** Key used for thread-local data. */
 static pthread_key_t eh_key;
@@ -744,12 +745,12 @@ static void throw_exception(__cxa_exception *ex)
 	ex->unexpectedHandler = info->unexpectedHandler;
 	if (0 == ex->unexpectedHandler)
 	{
-		ex->unexpectedHandler = unexpectedHandler;
+		ex->unexpectedHandler = unexpectedHandler.load();
 	}
 	ex->terminateHandler  = info->terminateHandler;
 	if (0 == ex->terminateHandler)
 	{
-		ex->terminateHandler = terminateHandler;
+		ex->terminateHandler = terminateHandler.load();
 	}
 	info->globals.uncaughtExceptions++;
 
@@ -1449,7 +1450,7 @@ namespace std
 	{
 		if (thread_local_handlers) { return pathscale::set_unexpected(f); }
 
-		return ATOMIC_SWAP(&unexpectedHandler, f);
+		return unexpectedHandler.exchange(f);
 	}
 	/**
 	 * Sets the function that is called to terminate the program.
@@ -1458,7 +1459,7 @@ namespace std
 	{
 		if (thread_local_handlers) { return pathscale::set_terminate(f); }
 
-		return ATOMIC_SWAP(&terminateHandler, f);
+		return terminateHandler.exchange(f);
 	}
 	/**
 	 * Terminates the program, calling a custom terminate implementation if
@@ -1474,7 +1475,7 @@ namespace std
 			// return.
 			abort();
 		}
-		terminateHandler();
+		terminateHandler.load()();
 	}
 	/**
 	 * Called when an unexpected exception is encountered (i.e. an exception
@@ -1491,7 +1492,7 @@ namespace std
 			// return.
 			abort();
 		}
-		unexpectedHandler();
+		unexpectedHandler.load()();
 	}
 	/**
 	 * Returns whether there are any exceptions currently being thrown that
@@ -1521,7 +1522,7 @@ namespace std
 		{
 			return info->unexpectedHandler;
 		}
-		return ATOMIC_LOAD(&unexpectedHandler);
+		return unexpectedHandler.load();
 	}
 	/**
 	 * Returns the current terminate handler.
@@ -1533,7 +1534,7 @@ namespace std
 		{
 			return info->terminateHandler;
 		}
-		return ATOMIC_LOAD(&terminateHandler);
+		return terminateHandler.load();
 	}
 }
 #if defined(__arm__) && !defined(__ARM_DWARF_EH__)
diff --git a/contrib/libcxxrt/guard.cc b/contrib/libcxxrt/guard.cc
index 34d294cf7432..515992563a10 100644
--- a/contrib/libcxxrt/guard.cc
+++ b/contrib/libcxxrt/guard.cc
@@ -1,5 +1,6 @@
-/* 
+/*
  * Copyright 2010-2012 PathScale, Inc. All rights reserved.
+ * Copyright 2021 David Chisnall. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -10,7 +11,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright notice,
  *    this list of conditions and the following disclaimer in the documentation
  *    and/or other materials provided with the distribution.
- * 
+ *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -38,126 +39,305 @@
  * value as a low-overhead lock.  Because statics (in most sane code) are
  * accessed far more times than they are initialised, this lock implementation
  * is heavily optimised towards the case where the static has already been
- * initialised.  
+ * initialised.
  */
+#include "atomic.h"
+#include <assert.h>
+#include <pthread.h>
 #include <stdint.h>
 #include <stdlib.h>
-#include <stdio.h>
-#include <pthread.h>
-#include <assert.h>
-#include "atomic.h"
 
 // Older GCC doesn't define __LITTLE_ENDIAN__
 #ifndef __LITTLE_ENDIAN__
-	// If __BYTE_ORDER__ is defined, use that instead
+// If __BYTE_ORDER__ is defined, use that instead
 #	ifdef __BYTE_ORDER__
 #		if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 #			define __LITTLE_ENDIAN__
 #		endif
-	// x86 and ARM are the most common little-endian CPUs, so let's have a
-	// special case for them (ARM is already special cased).  Assume everything
-	// else is big endian.
+// x86 and ARM are the most common little-endian CPUs, so let's have a
+// special case for them (ARM is already special cased).  Assume everything
+// else is big endian.
 #	elif defined(__x86_64) || defined(__i386)
 #		define __LITTLE_ENDIAN__
 #	endif
 #endif
 
-
 /*
- * The least significant bit of the guard variable indicates that the object
- * has been initialised, the most significant bit is used for a spinlock.
+ * The Itanium C++ ABI defines guard words that are 64-bit (32-bit on AArch32)
+ * values with one bit defined to indicate that the guarded variable is and
+ * another bit to indicate that it's currently locked (initialisation in
+ * progress).  The bit to use depends on the byte order of the target.
+ *
+ * On many 32-bit platforms, 64-bit atomics are unavailable (or slow) and so we
+ * treat the two halves of the 64-bit word as independent values and
  */
+namespace
+{
+	/**
+	 * The state of the guard variable when an attempt is made to lock it.
+	 */
+	enum class GuardState
+	{
+		/**
+		 * The lock is not held but is not needed because initialisation is
+		 * one.
+		 */
+		InitDone,
+
+		/**
+		 * Initialisation is not done but the lock is held by the caller.
+		 */
+		InitLockSucceeded,
+
+		/**
+		 * Attempting to acquire the lock failed.
+		 */
+		InitLockFailed
+	};
+
+	/**
+	 * Class encapsulating a single atomic word being used to represent the
+	 * guard.  The word size is defined by the type of `GuardWord`.  The bit
+	 * used to indicate the locked state is `1<<LockedBit`, the bit used to
+	 * indicate the initialised state is `1<<InitBit`.
+	 */
+	template<typename GuardWord, int LockedBit, int InitBit>
+	struct SingleWordGuard
+	{
+		/**
+		 * The value indicating that the lock bit is set (and no other bits).
+		 */
+		static constexpr GuardWord locked = static_cast<GuardWord>(1)
+		                                    << LockedBit;
+
+		/**
+		 * The value indicating that the initialised bit is set (and all other
+		 * bits are zero).
+		 */
+		static constexpr GuardWord initialised = static_cast<GuardWord>(1)
+		                                         << InitBit;
+
+		/**
+		 * The guard variable.
+		 */
+		atomic<GuardWord> val;
+
+		public:
+		/**
+		 * Release the lock and set the initialised state.  In the single-word
+		 * implementation here, these are both done by a single store.
+		 */
+		void unlock(bool isInitialised)
+		{
+			val.store(isInitialised ? initialised : 0, memory_order::release);
+#ifndef NDEBUG
+			GuardWord init_state = initialised;
+			assert(*reinterpret_cast<uint8_t*>(&init_state) != 0);
+#endif
+		}
+
+		/**
+		 * Try to acquire the lock.  This has a tri-state return, indicating
+		 * either that the lock was acquired, it wasn't acquired because it was
+		 * contended, or it wasn't acquired because the guarded variable is
+		 * already initialised.
+		 */
+		GuardState try_lock()
+		{
+			GuardWord old = 0;
+			// Try to acquire the lock, assuming that we are in the state where
+			// the lock is not held and the variable is not initialised (so the
+			// expected value is 0).
+			if (val.compare_exchange(old, locked))
+			{
+				return GuardState::InitLockSucceeded;
+			}
+			// If the CAS failed and the old value indicates that this is
+			// initialised, return that initialisation is done and skip further
+			// retries.
+			if (old == initialised)
+			{
+				return GuardState::InitDone;
+			}
+			// Otherwise, report failure.
+			return GuardState::InitLockFailed;
+		}
+
+		/**
+		 * Check whether the guard indicates that the variable is initialised.
+		 */
+		bool is_initialised()
+		{
+			return (val.load(memory_order::acquire) & initialised) ==
+			       initialised;
+		}
+	};
+
+	/**
+	 * Class encapsulating using two 32-bit atomic values to represent a 64-bit
+	 * guard variable.
+	 */
+	template<int LockedBit, int InitBit>
+	class DoubleWordGuard
+	{
+		/**
+		 * The value of `lock_word` when the lock is held.
+		 */
+		static constexpr uint32_t locked = static_cast<uint32_t>(1)
+		                                   << LockedBit;
+
+		/**
+		 * The value of `init_word` when the guarded variable is initialised.
+		 */
+		static constexpr uint32_t initialised = static_cast<uint32_t>(1)
+		                                        << InitBit;
+
+		/**
+		 * The word used for the initialised flag.  This is always the first
+		 * word irrespective of endian because the generated code compares the
+		 * first byte in memory against 0.
+		 */
+		atomic<uint32_t> init_word;
+
+		/**
+		 * The word used for the lock.
+		 */
+		atomic<uint32_t> lock_word;
+
+		public:
+		/**
+		 * Try to acquire the lock.  This has a tri-state return, indicating
+		 * either that the lock was acquired, it wasn't acquired because it was
+		 * contended, or it wasn't acquired because the guarded variable is
+		 * already initialised.
+		 */
+		GuardState try_lock()
+		{
+			uint32_t old = 0;
+			// Try to acquire the lock
+			if (lock_word.compare_exchange(old, locked))
+			{
+				// If we succeeded, check if initialisation has happened.  In
+				// this version, we don't have atomic manipulation of both the
+				// lock and initialised bits together.  Instead, we have an
+				// ordering rule that the initialised bit is only ever updated
+				// with the lock held.
+				if (is_initialised())
+				{
+					// If another thread did manage to initialise this, release
+					// the lock and notify the caller that initialisation is
+					// done.
+					lock_word.store(initialised, memory_order::release);
+					return GuardState::InitDone;
+				}
+				return GuardState::InitLockSucceeded;
+			}
+			return GuardState::InitLockFailed;
+		}
+
+		/**
+		 * Set the initialised state and release the lock.  In this
+		 * implementation, this is ordered, not atomic: the initialise bit is
+		 * set while the lock is held.
+		 */
+		void unlock(bool isInitialised)
+		{
+			init_word.store(isInitialised ? initialised : 0,
+			                  memory_order::release);
+			lock_word.store(0, memory_order::release);
+			assert((*reinterpret_cast<uint8_t*>(this) != 0) == isInitialised);
+		}
+
+		/**
+		 * Return whether the guarded variable is initialised.
+		 */
+		bool is_initialised()
+		{
+			return (init_word.load(memory_order::acquire) & initialised) ==
+			       initialised;
+		}
+	};
+
+	// Check that the two implementations are the correct size.
+	static_assert(sizeof(SingleWordGuard<uint32_t, 31, 0>) == sizeof(uint32_t),
+	              "Single-word 32-bit guard must be 32 bits");
+	static_assert(sizeof(SingleWordGuard<uint64_t, 63, 0>) == sizeof(uint64_t),
+	              "Single-word 64-bit guard must be 64 bits");
+	static_assert(sizeof(DoubleWordGuard<31, 0>) == sizeof(uint64_t),
+	              "Double-word guard must be 64 bits");
+
 #ifdef __arm__
-// ARM ABI - 32-bit guards.
-typedef uint32_t guard_t;
-typedef uint32_t guard_lock_t;
-static const uint32_t LOCKED = static_cast<guard_t>(1) << 31;
-static const uint32_t INITIALISED = 1;
-#define LOCK_PART(guard) (guard)
-#define INIT_PART(guard) (guard)
+	/**
+	 * The Arm PCS defines a variant of the Itanium ABI with 32-bit lock words.
+	 */
+	using Guard = SingleWordGuard<uint32_t, 31, 0>;
 #elif defined(_LP64)
-typedef uint64_t guard_t;
-typedef uint64_t guard_lock_t;
 #	if defined(__LITTLE_ENDIAN__)
-static const guard_t LOCKED = static_cast<guard_t>(1) << 63;
-static const guard_t INITIALISED = 1;
+	/**
+	 * On little-endian 64-bit platforms the guard word is a single 64-bit
+	 * atomic with the lock in the high bit and the initialised flag in the low
+	 * bit.
+	 */
+	using Guard = SingleWordGuard<uint64_t, 63, 0>;
 #	else
-static const guard_t LOCKED = 1;
-static const guard_t INITIALISED = static_cast<guard_t>(1) << 56;
+	/**
+	 * On bit-endian 64-bit platforms, the guard word is a single 64-bit atomic
+	 * with the lock in the low bit and the initialised bit in the highest
+	 * byte.
+	 */
+	using Guard = SingleWordGuard<uint64_t, 0, 56>;
 #	endif
-#define LOCK_PART(guard) (guard)
-#define INIT_PART(guard) (guard)
 #else
-typedef uint32_t guard_lock_t;
 #	if defined(__LITTLE_ENDIAN__)
-typedef struct {
-	uint32_t init_half;
-	uint32_t lock_half;
-} guard_t;
-static const uint32_t LOCKED = static_cast<guard_lock_t>(1) << 31;
-static const uint32_t INITIALISED = 1;
+	/**
+	 * 32-bit platforms use the same layout as 64-bit.
+	 */
+	using Guard = DoubleWordGuard<31, 0>;
 #	else
-typedef struct {
-	uint32_t init_half;
-	uint32_t lock_half;
-} guard_t;
-static_assert(sizeof(guard_t) == sizeof(uint64_t), "");
-static const uint32_t LOCKED = 1;
-static const uint32_t INITIALISED = static_cast<guard_lock_t>(1) << 24;
+	/**
+	 * 32-bit platforms use the same layout as 64-bit.
+	 */
+	using Guard = DoubleWordGuard<0, 24>;
 #	endif
-#define LOCK_PART(guard) (&(guard)->lock_half)
-#define INIT_PART(guard) (&(guard)->init_half)
 #endif
-static const guard_lock_t INITIAL = 0;
+
+} // namespace
 
 /**
  * Acquires a lock on a guard, returning 0 if the object has already been
  * initialised, and 1 if it has not.  If the object is already constructed then
  * this function just needs to read a byte from memory and return.
  */
-extern "C" int __cxa_guard_acquire(volatile guard_t *guard_object)
+extern "C" int __cxa_guard_acquire(Guard *guard_object)
 {
-	guard_lock_t old;
-	// Not an atomic read, doesn't establish a happens-before relationship, but
-	// if one is already established and we end up seeing an initialised state
-	// then it's a fast path, otherwise we'll do something more expensive than
-	// this test anyway...
-	if (INITIALISED == *INIT_PART(guard_object))
+	// Check if this is already initialised.  If so, we don't have to do
+	// anything.
+	if (guard_object->is_initialised())
+	{
 		return 0;
-	// Spin trying to do the initialisation
+	}
+	// Spin trying to acquire the lock.  If we fail to acquire the lock the
+	// first time then another thread will *probably* initialise it, but if the
+	// constructor throws an exception then we may have to try again in this
+	// thread.
 	for (;;)
 	{
-		// Loop trying to move the value of the guard from 0 (not
-		// locked, not initialised) to the locked-uninitialised
-		// position.
-		old = __sync_val_compare_and_swap(LOCK_PART(guard_object),
-		    INITIAL, LOCKED);
-		if (old == INITIAL) {
-			// Lock obtained.  If lock and init bit are
-			// in separate words, check for init race.
-			if (INIT_PART(guard_object) == LOCK_PART(guard_object))
-				return 1;
-			if (INITIALISED != *INIT_PART(guard_object))
+		// Try to acquire the lock.
+		switch (guard_object->try_lock())
+		{
+			// If we failed to acquire the lock but another thread has
+			// initialised the lock while we were waiting, return immediately
+			// indicating that initialisation is not required.
+			case GuardState::InitDone:
+				return 0;
+			// If we acquired the lock, return immediately to start
+			// initialisation.
+			case GuardState::InitLockSucceeded:
 				return 1;
-
-			// No need for a memory barrier here,
-			// see first comment.
-			*LOCK_PART(guard_object) = INITIAL;
-			return 0;
+			// If we didn't acquire the lock, pause and retry.
+			case GuardState::InitLockFailed:
+				break;
 		}
-		// If lock and init bit are in the same word, check again
-		// if we are done.
-		if (INIT_PART(guard_object) == LOCK_PART(guard_object) &&
-		    old == INITIALISED)
-			return 0;
-
-		assert(old == LOCKED);
-		// Another thread holds the lock.
-		// If lock and init bit are in different words, check
-		// if we are done before yielding and looping.
-		if (INIT_PART(guard_object) != LOCK_PART(guard_object) &&
-		    INITIALISED == *INIT_PART(guard_object))
-			return 0;
 		sched_yield();
 	}
 }
@@ -166,28 +346,16 @@ extern "C" int __cxa_guard_acquire(volatile guard_t *guard_object)
  * Releases the lock without marking the object as initialised.  This function
  * is called if initialising a static causes an exception to be thrown.
  */
-extern "C" void __cxa_guard_abort(volatile guard_t *guard_object)
+extern "C" void __cxa_guard_abort(Guard *guard_object)
 {
-	__attribute__((unused))
-	bool reset = __sync_bool_compare_and_swap(LOCK_PART(guard_object),
-	    LOCKED, INITIAL);
-	assert(reset);
+	guard_object->unlock(false);
 }
+
 /**
  * Releases the guard and marks the object as initialised.  This function is
  * called after successful initialisation of a static.
  */
-extern "C" void __cxa_guard_release(volatile guard_t *guard_object)
+extern "C" void __cxa_guard_release(Guard *guard_object)
 {
-	guard_lock_t old;
-	if (INIT_PART(guard_object) == LOCK_PART(guard_object))
-		old = LOCKED;
-	else
-		old = INITIAL;
-	__attribute__((unused))
-	bool reset = __sync_bool_compare_and_swap(INIT_PART(guard_object),
-	    old, INITIALISED);
-	assert(reset);
-	if (INIT_PART(guard_object) != LOCK_PART(guard_object))
-		*LOCK_PART(guard_object) = INITIAL;
+	guard_object->unlock(true);
 }
diff --git a/contrib/libcxxrt/libelftc_dem_gnu3.c b/contrib/libcxxrt/libelftc_dem_gnu3.c
index 93e1c41fa034..e75d1694562e 100644
--- a/contrib/libcxxrt/libelftc_dem_gnu3.c
+++ b/contrib/libcxxrt/libelftc_dem_gnu3.c
@@ -1116,7 +1116,7 @@ cpp_demangle_read_array(struct cpp_demangle_data *ddata)
 		if (!cpp_demangle_read_type(ddata, NULL))
 			return (0);
 
-		if (!DEM_PUSH_STR(ddata, "[]"))
+		if (!DEM_PUSH_STR(ddata, " []"))
 			return (0);
 	} else {
 		if (ELFTC_ISDIGIT(*ddata->cur) != 0) {
@@ -1131,7 +1131,7 @@ cpp_demangle_read_array(struct cpp_demangle_data *ddata)
 				return (0);
 			if (!cpp_demangle_read_type(ddata, NULL))
 				return (0);
-			if (!DEM_PUSH_STR(ddata, "["))
+			if (!DEM_PUSH_STR(ddata, " ["))
 				return (0);
 			if (!cpp_demangle_push_str(ddata, num, num_len))
 				return (0);
@@ -1163,7 +1163,7 @@ cpp_demangle_read_array(struct cpp_demangle_data *ddata)
 				free(exp);
 				return (0);
 			}
-			if (!DEM_PUSH_STR(ddata, "[")) {
+			if (!DEM_PUSH_STR(ddata, " [")) {
 				free(exp);
 				return (0);
 			}
diff --git a/contrib/libcxxrt/memory.cc b/contrib/libcxxrt/memory.cc
index 5f1aad76961f..7beb048ae914 100644
--- a/contrib/libcxxrt/memory.cc
+++ b/contrib/libcxxrt/memory.cc
@@ -51,7 +51,7 @@ typedef void (*new_handler)();
  * The function to call when allocation fails.  By default, there is no
  * handler and a bad allocation exception is thrown if an allocation fails.
  */
-static new_handler new_handl;
+static atomic<new_handler> new_handl{nullptr};
 
 namespace std
 {
@@ -61,12 +61,13 @@ namespace std
 	__attribute__((weak))
 	new_handler set_new_handler(new_handler handler)
 	{
-		return ATOMIC_SWAP(&new_handl, handler);
+		return new_handl.exchange(handler);
 	}
+
 	__attribute__((weak))
 	new_handler get_new_handler(void)
 	{
-		return ATOMIC_LOAD(&new_handl);
+		return new_handl.load();
 	}
 }
 
@@ -79,6 +80,32 @@ namespace std
 #define BADALLOC
 #endif
 
+namespace
+{
+	/**
+	 * Helper for forwarding from no-throw operators to versions that can
+	 * return nullptr.  Catches any exception and converts it into a nullptr
+	 * return.
+	 */
+	template<void*(New)(size_t)>
+	void *noexcept_new(size_t size)
+	{
+#if !defined(_CXXRT_NO_EXCEPTIONS)
+	try
+	{
+		return New(size);
+	} catch (...)
+	{
+		// nothrow operator new should return NULL in case of
+		// std::bad_alloc exception in new handler
+		return nullptr;
+	}
+#else
+	return New(size);
+#endif
+	}
+}
+
 
 __attribute__((weak))
 void* operator new(size_t size) BADALLOC
@@ -97,7 +124,11 @@ void* operator new(size_t size) BADALLOC
 		}
 		else
 		{
+#if !defined(_CXXRT_NO_EXCEPTIONS)
 			throw std::bad_alloc();
+#else
+			break;
+#endif
 		}
 		mem = malloc(size);
 	}
@@ -105,16 +136,11 @@ void* operator new(size_t size) BADALLOC
 	return mem;
 }
 
+
 __attribute__((weak))
 void* operator new(size_t size, const std::nothrow_t &) NOEXCEPT
 {
-	try {
-		return :: operator new(size);
-	} catch (...) {
-		// nothrow operator new should return NULL in case of
-		// std::bad_alloc exception in new handler
-		return NULL;
-	}
+	return noexcept_new<(::operator new)>(size);
 }
 
 
@@ -135,13 +161,7 @@ void * operator new[](size_t size) BADALLOC
 __attribute__((weak))
 void * operator new[](size_t size, const std::nothrow_t &) NOEXCEPT
 {
-	try {
-		return ::operator new[](size);
-	} catch (...) {
-		// nothrow operator new should return NULL in case of
-		// std::bad_alloc exception in new handler
-		return NULL;
-	}
+	return noexcept_new<(::operator new[])>(size);
 }
 
 
diff --git a/contrib/libcxxrt/noexception.cc b/contrib/libcxxrt/noexception.cc
new file mode 100644
index 000000000000..25dac1279684
--- /dev/null
+++ b/contrib/libcxxrt/noexception.cc
@@ -0,0 +1,45 @@
+/* 
+ * Copyright 2021 Microsoft. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
+ * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+namespace std
+{
+	/**
+	 * Returns whether there are any exceptions currently being thrown that
+	 * have not been caught. Without exception support this is always false.
+	 */
+	bool uncaught_exception() throw()
+	{
+		return false;
+	}
+	/**
+	 * Returns the number of exceptions currently being thrown that have not
+	 * been caught. Without exception support this is always 0.
+	 */
+	int uncaught_exceptions() throw()
+	{
+		return 0;
+	}
+}