bin/95339: [patch] rtld is thread-unsafe. fixes for dlopen mt behavior

Kostik Belousov kostikbel at gmail.com
Wed Apr 5 07:50:15 UTC 2006


>Number:         95339
>Category:       bin
>Synopsis:       [patch] rtld is thread-unsafe. fixes for dlopen mt behavior
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Wed Apr 05 07:50:13 GMT 2006
>Closed-Date:
>Last-Modified:
>Originator:     Kostik Belousov
>Release:        7-CURRENT, 6-STABLE
>Organization:
-
>Environment:
-
>Description:
dlopen/dlclose rtld operations are not thread safe. Dynamic loader can
be easily segfaulted or trapped into assertion with parallel invocations
of rtld operations from multiple threads. Test case and fix for problems
exposed by the test are attached.
>How-To-Repeat:
Use the following test by Kazuaki Oda <kaakun at highway dot ne dot jp>
to see errant behaviour:

#include <err.h>
#include <dlfcn.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

#define NTHREADS	10

void *func(void *dummy);

int main(void)
{
    pthread_t tids[NTHREADS];
    int error;
    int i;

    for (i = 0; i < NTHREADS; i++) {
	error = pthread_create(&tids[i], NULL, func, NULL);
	if (error)
	    errc(1, error, "pthread_create");
    }

    for (;;)
	sleep(1);

    /* NOTREACHED */

    exit(0);
}

void *func(void *dummy)
{
    void *h;
    unsigned long c = 0;

    for (;;) {
	    if ((h = dlopen("/usr/lib/libm.so", RTLD_NOW)) == NULL) {
		    fprintf(stderr, "dlopen: %s\n", dlerror());
		    continue;
	    }
	    if (dlclose(h) == -1)
		    fprintf(stderr, "dlclose: %s\n", dlerror());
	    if (c++ == 10000) {
		    printf(".\n");
		    c = 0;
	    }
    }

    /* NOTREACHED */

    return (NULL);
}

>Fix:
Index: libexec/rtld-elf/rtld.c
===================================================================
RCS file: /usr/local/arch/ncvs/src/libexec/rtld-elf/rtld.c,v
retrieving revision 1.112
diff -u -r1.112 rtld.c
--- libexec/rtld-elf/rtld.c	24 Dec 2005 15:37:30 -0000	1.112
+++ libexec/rtld-elf/rtld.c	23 Mar 2006 10:44:27 -0000
@@ -105,7 +105,7 @@
 static int load_preload_objects(void);
 static Obj_Entry *load_object(const char *, const Obj_Entry *);
 static Obj_Entry *obj_from_addr(const void *);
-static void objlist_call_fini(Objlist *);
+static void objlist_call_fini(Objlist *, int *lockstate, unsigned long *gen);
 static void objlist_call_init(Objlist *);
 static void objlist_clear(Objlist *);
 static Objlist_Entry *objlist_find(Objlist *, const Obj_Entry *);
@@ -165,6 +165,7 @@
   STAILQ_HEAD_INITIALIZER(list_main);
 static Objlist list_fini =	/* Objects needing fini() calls */
   STAILQ_HEAD_INITIALIZER(list_fini);
+static unsigned long list_fini_gen = 0;
 
 static Elf_Sym sym_zero;	/* For resolving undefined weak refs. */
 
@@ -1168,8 +1169,10 @@
 	objlist_push_tail(list, obj);
 
     /* Add the object to the global fini list in the reverse order. */
-    if (obj->fini != (Elf_Addr)NULL)
+    if (obj->fini != (Elf_Addr)NULL) {
 	objlist_push_head(&list_fini, obj);
+	list_fini_gen++;
+    }
 }
 
 #ifndef FPTR_TARGET
@@ -1362,21 +1365,30 @@
  * non-NULL fini functions.
  */
 static void
-objlist_call_fini(Objlist *list)
+objlist_call_fini(Objlist *list, int *lockstate, unsigned long *gen)
 {
-    Objlist_Entry *elm;
+    Objlist_Entry *elm, *elm_tmp;
     char *saved_msg;
+    unsigned long saved_gen = *gen;
 
     /*
      * Preserve the current error message since a fini function might
      * call into the dynamic linker and overwrite it.
      */
     saved_msg = errmsg_save();
-    STAILQ_FOREACH(elm, list, link) {
+ again:
+    STAILQ_FOREACH_SAFE(elm, list, link, elm_tmp) {
 	if (elm->obj->refcount == 0) {
 	    dbg("calling fini function for %s at %p", elm->obj->path,
 	        (void *)elm->obj->fini);
+	    elm->obj->fini_now = true;
+	    wlock_release(rtld_bind_lock, *lockstate);
 	    call_initfini_pointer(elm->obj, elm->obj->fini);
+	    *lockstate = wlock_acquire(rtld_bind_lock);
+	    if (*gen != saved_gen) {
+		    saved_gen = *gen;
+		    goto again;
+	    }
 	}
     }
     errmsg_restore(saved_msg);
@@ -1390,7 +1402,7 @@
 static void
 objlist_call_init(Objlist *list)
 {
-    Objlist_Entry *elm;
+    Objlist_Entry *elm, *elm_tmp;
     char *saved_msg;
 
     /*
@@ -1398,7 +1410,7 @@
      * call into the dynamic linker and overwrite it.
      */
     saved_msg = errmsg_save();
-    STAILQ_FOREACH(elm, list, link) {
+    STAILQ_FOREACH_SAFE(elm, list, link, elm_tmp) {
 	dbg("calling init function for %s at %p", elm->obj->path,
 	    (void *)elm->obj->init);
 	call_initfini_pointer(elm->obj, elm->obj->init);
@@ -1563,15 +1575,18 @@
 rtld_exit(void)
 {
     Obj_Entry *obj;
+    int lockstate;
 
     dbg("rtld_exit()");
     /* Clear all the reference counts so the fini functions will be called. */
+    lockstate = wlock_acquire(rtld_bind_lock);
     for (obj = obj_list;  obj != NULL;  obj = obj->next)
 	obj->refcount = 0;
-    objlist_call_fini(&list_fini);
+    objlist_call_fini(&list_fini, &lockstate, &list_fini_gen);
     /* No need to remove the items from the list, since we are exiting. */
     if (!libmap_disable)
         lm_fini();
+    wlock_release(rtld_bind_lock, lockstate);
 }
 
 static void *
@@ -1685,10 +1700,10 @@
 	 * The object is no longer referenced, so we must unload it.
 	 * First, call the fini functions with no locks held.
 	 */
-	wlock_release(rtld_bind_lock, lockstate);
-	objlist_call_fini(&list_fini);
-	lockstate = wlock_acquire(rtld_bind_lock);
+	objlist_call_fini(&list_fini, &lockstate, &list_fini_gen);
+
 	objlist_remove_unref(&list_fini);
+	list_fini_gen++;
 
 	/* Finish cleaning up the newly-unreferenced objects. */
 	GDB_STATE(RT_DELETE,&root->linkmap);
@@ -1741,9 +1756,9 @@
     if (ld_tracing != NULL)
 	environ = (char **)*get_program_var_addr("environ");
 
+    lockstate = wlock_acquire(rtld_bind_lock);
     objlist_init(&initlist);
 
-    lockstate = wlock_acquire(rtld_bind_lock);
     GDB_STATE(RT_ADD,NULL);
 
     old_obj_tail = obj_tail;
@@ -1755,7 +1770,10 @@
 	obj = load_object(name, obj_main);
     }
 
-    if (obj) {
+    if (obj && obj->fini_now) {
+	obj = NULL;
+	_rtld_error("%s is running finalizers now", name);
+    } else if (obj) {
 	obj->dl_refcount++;
 	if (mode & RTLD_GLOBAL && objlist_find(&list_global, obj) == NULL)
 	    objlist_push_tail(&list_global, obj);
Index: libexec/rtld-elf/rtld.h
===================================================================
RCS file: /usr/local/arch/ncvs/src/libexec/rtld-elf/rtld.h,v
retrieving revision 1.37
diff -u -r1.37 rtld.h
--- libexec/rtld-elf/rtld.h	18 Dec 2005 19:43:32 -0000	1.37
+++ libexec/rtld-elf/rtld.h	23 Mar 2006 10:44:27 -0000
@@ -210,6 +210,7 @@
     bool jmpslots_done;		/* Already have relocated the jump slots */
     bool init_done;		/* Already have added object to init list */
     bool tls_done;		/* Already allocated offset for static TLS */
+    bool fini_now;		/* Finalizer for dso currently runs */
 
     struct link_map linkmap;	/* for GDB and dlinfo() */
     Objlist dldags;		/* Object belongs to these dlopened DAGs (%) */

Please note that patch forces dlopen for dso to fail if another
thread run finalizers from the same dso at the time of the call.

joerg at britannica dot bec dot de does not like the patch and
promised to supply better fix, but still not available.
>Release-Note:
>Audit-Trail:
>Unformatted:


More information about the freebsd-bugs mailing list