Infinite loop bug in libc_r on 4.x with condition variables a
nd signals
Julian Elischer
julian at elischer.org
Thu Oct 28 16:00:25 PDT 2004
David Xu wrote:
> Here is the cvs log:
>
> Revision Changes Path
> 1.58 +1 -0 src/lib/libpthread/thread/thr_create.c
> 1.14 +1 -1 src/lib/libpthread/thread/thr_find_thread.c
> 1.115 +27 -10 src/lib/libpthread/thread/thr_kern.c
> 1.119 +15 -11 src/lib/libpthread/thread/thr_private.h
> 1.81 +1 -2 src/lib/libpthread/thread/thr_sig.c
commit message was:
1. Move thread list flags into new separate member, and atomically
put DEAD thread on GC list, this closes a race between pthread_join
and thr_cleanup.
2. Introduce a mutex to protect tcb initialization, tls allocation and
deallocation code in rtld seems no lock protection or it is broken,
under stress testing, memory is corrupted.
translates to:
> julian at julian:cvs diff -u
> cvs server: Diffing .
> Index: thr_create.c
> ===================================================================
> RCS file: /home/ncvs/src/lib/libpthread/thread/thr_create.c,v
> retrieving revision 1.57
> diff -u -r1.57 thr_create.c
> --- thr_create.c 12 Aug 2004 12:12:12 -0000 1.57
> +++ thr_create.c 28 Oct 2004 22:55:58 -0000
> @@ -234,6 +234,7 @@
> new_thread->specific_data_count = 0;
> new_thread->cleanup = NULL;
> new_thread->flags = 0;
> + new_thread->tlflags = 0;
> new_thread->continuation = NULL;
> new_thread->wakeup_time.tv_sec = -1;
> new_thread->lock_switch = 0;
> Index: thr_find_thread.c
> ===================================================================
> RCS file: /home/ncvs/src/lib/libpthread/thread/thr_find_thread.c,v
> retrieving revision 1.13
> diff -u -r1.13 thr_find_thread.c
> --- thr_find_thread.c 17 Jul 2003 23:02:30 -0000 1.13
> +++ thr_find_thread.c 28 Oct 2004 22:55:58 -0000
> @@ -90,7 +90,7 @@
> if (curthread != NULL)
> curthread->critical_count--;
> if ((thread->refcount == 0) &&
> - (thread->flags & THR_FLAGS_GC_SAFE) != 0)
> + (thread->tlflags & TLFLAGS_GC_SAFE) != 0)
> THR_GCLIST_ADD(thread);
> KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
> _kse_critical_leave(crit);
> Index: thr_kern.c
> ===================================================================
> RCS file: /home/ncvs/src/lib/libpthread/thread/thr_kern.c,v
> retrieving revision 1.112
> diff -u -r1.112 thr_kern.c
> --- thr_kern.c 15 Aug 2004 16:28:05 -0000 1.112
> +++ thr_kern.c 28 Oct 2004 22:55:58 -0000
> @@ -139,6 +139,9 @@
> static struct thread_hash_head thr_hashtable[THREAD_HASH_QUEUES];
> #define THREAD_HASH(thrd) ((unsigned long)thrd %
> THREAD_HASH_QUEUE
> S)
>
> +/* Lock for thread tcb constructor/destructor */
> +static pthread_mutex_t _tcb_mutex;
> +
> #ifdef DEBUG_THREAD_KERN
> static void dump_queues(struct kse *curkse);
> #endif
> @@ -166,7 +169,7 @@
> struct pthread_sigframe *psf);
> static int thr_timedout(struct pthread *thread, struct timespec
> *curtime);
> static void thr_unlink(struct pthread *thread);
> -static void thr_destroy(struct pthread *thread);
> +static void thr_destroy(struct pthread *curthread, struct pthread
> *thread);
> static void thread_gc(struct pthread *thread);
> static void kse_gc(struct pthread *thread);
> static void kseg_gc(struct pthread *thread);
> @@ -240,7 +243,7 @@
> _thr_stack_free(&thread->attr);
> if (thread->specific != NULL)
> free(thread->specific);
> - thr_destroy(thread);
> + thr_destroy(curthread, thread);
> }
> }
>
> @@ -285,14 +288,14 @@
> /* Free the free threads. */
> while ((thread = TAILQ_FIRST(&free_threadq)) != NULL) {
> TAILQ_REMOVE(&free_threadq, thread, tle);
> - thr_destroy(thread);
> + thr_destroy(curthread, thread);
> }
> free_thread_count = 0;
>
> /* Free the to-be-gc'd threads. */
> while ((thread = TAILQ_FIRST(&_thread_gc_list)) != NULL) {
> TAILQ_REMOVE(&_thread_gc_list, thread, gcle);
> - thr_destroy(thread);
> + thr_destroy(curthread, thread);
> }
> TAILQ_INIT(&gc_ksegq);
> _gc_count = 0;
> @@ -381,6 +384,7 @@
> if (_lock_init(&_thread_list_lock, LCK_ADAPTIVE,
> _kse_lock_wait, _kse_lock_wakeup) != 0)
> PANIC("Unable to initialize thread list lock");
> + _pthread_mutex_init(&_tcb_mutex, NULL);
> active_kse_count = 0;
> active_kseg_count = 0;
> _gc_count = 0;
> @@ -1204,7 +1208,6 @@
> thread->kseg = _kse_initial->k_kseg;
> thread->kse = _kse_initial;
> }
> - thread->flags |= THR_FLAGS_GC_SAFE;
>
> /*
> * We can't hold the thread list lock while holding the
> @@ -1213,6 +1216,7 @@
> KSE_SCHED_UNLOCK(curkse, curkse->k_kseg);
> DBG_MSG("Adding thread %p to GC list\n", thread);
> KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock);
> + thread->tlflags |= TLFLAGS_GC_SAFE;
> THR_GCLIST_ADD(thread);
> KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
> if (sys_scope) {
> @@ -1252,7 +1256,7 @@
> /* Check the threads waiting for GC. */
> for (td = TAILQ_FIRST(&_thread_gc_list); td != NULL; td =
> td_next) {
> td_next = TAILQ_NEXT(td, gcle);
> - if ((td->flags & THR_FLAGS_GC_SAFE) == 0)
> + if ((td->tlflags & TLFLAGS_GC_SAFE) == 0)
> continue;
> else if (((td->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) &&
> ((td->kse->k_kcb->kcb_kmbx.km_flags & KMF_DONE) ==
> 0)) {
> @@ -2382,7 +2386,14 @@
> if ((thread == NULL) &&
> ((thread = malloc(sizeof(struct pthread))) != NULL)) {
> bzero(thread, sizeof(struct pthread));
> - if ((thread->tcb = _tcb_ctor(thread, curthread ==
> NULL)) == NULL
> ) {
> + if (curthread) {
> + _pthread_mutex_lock(&_tcb_mutex);
> + thread->tcb = _tcb_ctor(thread, 0 /* not
> initial tls */)
> ;
> + _pthread_mutex_unlock(&_tcb_mutex);
> + } else {
> + thread->tcb = _tcb_ctor(thread, 1 /* initial
> tls */);
> + }
> + if (thread->tcb == NULL) {
> free(thread);
> thread = NULL;
> } else {
> @@ -2418,7 +2429,7 @@
> thread->name = NULL;
> }
> if ((curthread == NULL) || (free_thread_count >=
> MAX_CACHED_THREADS)) {
> - thr_destroy(thread);
> + thr_destroy(curthread, thread);
> } else {
> /* Add the thread to the free thread list. */
> crit = _kse_critical_enter();
> @@ -2431,14 +2442,20 @@
> }
>
> static void
> -thr_destroy(struct pthread *thread)
> +thr_destroy(struct pthread *curthread, struct pthread *thread)
> {
> int i;
>
> for (i = 0; i < MAX_THR_LOCKLEVEL; i++)
> _lockuser_destroy(&thread->lockusers[i]);
> _lock_destroy(&thread->lock);
> - _tcb_dtor(thread->tcb);
> + if (curthread) {
> + _pthread_mutex_lock(&_tcb_mutex);
> + _tcb_dtor(thread->tcb);
> + _pthread_mutex_unlock(&_tcb_mutex);
> + } else {
> + _tcb_dtor(thread->tcb);
> + }
> free(thread->siginfo);
> free(thread);
> }
> Index: thr_private.h
> ===================================================================
> RCS file: /home/ncvs/src/lib/libpthread/thread/thr_private.h,v
> retrieving revision 1.118
> diff -u -r1.118 thr_private.h
> --- thr_private.h 7 Aug 2004 15:15:38 -0000 1.118
> +++ thr_private.h 28 Oct 2004 22:55:59 -0000
> @@ -753,9 +753,13 @@
> #define THR_FLAGS_IN_RUNQ 0x0004 /* in run queue using pqe link */
> #define THR_FLAGS_EXITING 0x0008 /* thread is exiting */
> #define THR_FLAGS_SUSPENDED 0x0010 /* thread is suspended */
> -#define THR_FLAGS_GC_SAFE 0x0020 /* thread safe for
> cleaning */
> -#define THR_FLAGS_IN_TDLIST 0x0040 /* thread in all
> thread list */
> -#define THR_FLAGS_IN_GCLIST 0x0080 /* thread in gc list */
> +
> + /* Thread list flags; only set with thread list lock held. */
> +#define TLFLAGS_GC_SAFE 0x0001 /* thread safe for
> cleaning */
> +#define TLFLAGS_IN_TDLIST 0x0002 /* thread in all
> thread list */
> +#define TLFLAGS_IN_GCLIST 0x0004 /* thread in gc list */
> + int tlflags;
> +
> /*
> * Base priority is the user setable and retrievable priority
> * of the thread. It is only affected by explicit calls to
> @@ -897,30 +901,30 @@
> * the gc list.
> */
> #define THR_LIST_ADD(thrd) do { \
> - if (((thrd)->flags & THR_FLAGS_IN_TDLIST) == 0) { \
> + if (((thrd)->tlflags & TLFLAGS_IN_TDLIST) == 0) { \
> TAILQ_INSERT_HEAD(&_thread_list, thrd, tle); \
> _thr_hash_add(thrd); \
> - (thrd)->flags |= THR_FLAGS_IN_TDLIST; \
> + (thrd)->tlflags |= TLFLAGS_IN_TDLIST; \
> } \
> } while (0)
> #define THR_LIST_REMOVE(thrd) do { \
> - if (((thrd)->flags & THR_FLAGS_IN_TDLIST) != 0) { \
> + if (((thrd)->tlflags & TLFLAGS_IN_TDLIST) != 0) { \
> TAILQ_REMOVE(&_thread_list, thrd, tle); \
> _thr_hash_remove(thrd); \
> - (thrd)->flags &= ~THR_FLAGS_IN_TDLIST; \
> + (thrd)->tlflags &= ~TLFLAGS_IN_TDLIST; \
> } \
> } while (0)
> #define THR_GCLIST_ADD(thrd) do { \
> - if (((thrd)->flags & THR_FLAGS_IN_GCLIST) == 0) { \
> + if (((thrd)->tlflags & TLFLAGS_IN_GCLIST) == 0) { \
> TAILQ_INSERT_HEAD(&_thread_gc_list, thrd, gcle);\
> - (thrd)->flags |= THR_FLAGS_IN_GCLIST; \
> + (thrd)->tlflags |= TLFLAGS_IN_GCLIST; \
> _gc_count++; \
> } \
> } while (0)
> #define THR_GCLIST_REMOVE(thrd) do { \
> - if (((thrd)->flags & THR_FLAGS_IN_GCLIST) != 0) { \
> + if (((thrd)->tlflags & TLFLAGS_IN_GCLIST) != 0) { \
> TAILQ_REMOVE(&_thread_gc_list, thrd, gcle); \
> - (thrd)->flags &= ~THR_FLAGS_IN_GCLIST; \
> + (thrd)->tlflags &= ~TLFLAGS_IN_GCLIST; \
> _gc_count--; \
> } \
> } while (0)
> Index: thr_sig.c
> ===================================================================
> RCS file: /home/ncvs/src/lib/libpthread/thread/thr_sig.c,v
> retrieving revision 1.79
> diff -u -r1.79 thr_sig.c
> --- thr_sig.c 13 Jul 2004 22:52:11 -0000 1.79
> +++ thr_sig.c 28 Oct 2004 22:55:59 -0000
> @@ -1195,8 +1195,7 @@
> thr_sigframe_save(struct pthread *thread, struct pthread_sigframe *psf)
> {
> /* This has to initialize all members of the sigframe. */
> - psf->psf_flags =
> - thread->flags & (THR_FLAGS_PRIVATE | THR_FLAGS_IN_TDLIST);
> + psf->psf_flags = thread->flags & THR_FLAGS_PRIVATE;
> psf->psf_interrupted = thread->interrupted;
> psf->psf_timeout = thread->timeout;
> psf->psf_state = thread->state;
> julian at julian:
>
> Julian Elischer wrote:
>
>> David, do you have revision numbers of what needs to be MFC'd?
>>
>>
>> David Xu wrote:
>>
>>
>>> Daniel Eischen wrote:
>>>
>>>
>>>>> FWIW, we are having (I think) the same problem on 5.3 with
>>>>> libpthread. The
>>>>>
>>>>> panic there is in the mutex code about an assertion failing
>>>>> because a thread
>>>>> is on a syncq when it is not supposed to be.
>>>>>
>>>>
>>>>
>>>>
>>>> David and I recently fixed some races in pthread_join() and
>>>> pthread_exit() in -current libpthread. Don't know if those
>>>> were responsible...
>>>>
>>>>
>>>>
>>>
>>> That fix should be MFCed ASAP.
>>>
>>>
>>>> Here's a test program that shows correct behavior with both
>>>> libc_r and libpthread in -current.
>>>>
More information about the freebsd-threads
mailing list