[patch] mmap() MAP_TEXT implementation (to use for shared libraries)

Svatopluk Kraus onwahe at gmail.com
Mon Sep 3 10:35:09 UTC 2012


Hi,

  I found out that while the running excecutables and a dynamic linker
are protected against writing (ETXTBSY), the loaded shared libraries
are not protected. The libraries are mapped by mmap() in dynamic
linker (rtld) and there is no way how to set VV_TEXT flag on the
libraries vnodes in mmap() code.

  In linux compability code \compat\linux\linux_misc.c, linux_uselib()
sets VV_TEXT flags on a library vnode. In Solaris, MAP_TEXT flag
exists which informs mmap() that the mapped region will be used
primarily for executing instructions (for better MMU utilization).
With these on mind, I propose to implement MAP_TEXT option in mmap()
and in case that underlying object is a vnode, set VV_TEXT flag on it.

  I already have implemented it and with rtld map_object() patch it
works fine for me (of course). The rtld patch looks easy, however I'm
not sure about mmap patch.

  After some investigation, it looks that VV_TEXT once set on a vnode
remains set until last reference on the vnode is left. So, I don't
bother with VV_TEXT unset in munmap() to be consistent. The
executables and dynamic linker are activated in kernel, so VV_TEXT is
set before activation and cleared if something failed. Shared library
activation is done in dynamic linker (i.e., in userland). It's done in
steps and mmaping the library is one from them. So, I think that
VV_TEXT can be set in mmap() just after everything is finished
successfully.

  The patch itself is implemented in vm_mmap_vnode(). If I want to set
VV_TEXT flag on a vnode, I need an exclusive lock. In current code,
the exclusive lock flag is (mis)used as a flag for
vnode_pager_update_writecount() call. (I hope that I didn't miss
something.) So, the patch is bigger slightly.

  I defined the MAP_TEXT flag in extented flags sections. However, I'm
feeling the relation to MAP_STACK flag, but not sure if and when
reserved flags (in other flags section) can be re-used.

       Svata


  Index: libexec/rtld-elf/map_object.c
===================================================================
--- libexec/rtld-elf/map_object.c	(revision 239770)
+++ libexec/rtld-elf/map_object.c	(working copy)
@@ -199,7 +199,8 @@
 	data_prot = convert_prot(segs[i]->p_flags);
 	data_flags = convert_flags(segs[i]->p_flags) | MAP_FIXED;
 	if (mmap(data_addr, data_vlimit - data_vaddr, data_prot,
-	  data_flags | MAP_PREFAULT_READ, fd, data_offset) == (caddr_t) -1) {
+	  data_flags | MAP_PREFAULT_READ | MAP_TEXT, fd, data_offset) ==
+	    (caddr_t) -1) {
 	    _rtld_error("%s: mmap of data failed: %s", path,
 		rtld_strerror(errno));
 	    goto error1;
Index: sys/vm/vm_mmap.c
===================================================================
--- sys/vm/vm_mmap.c	(revision 239770)
+++ sys/vm/vm_mmap.c	(working copy)
@@ -1258,10 +1258,13 @@
 	struct mount *mp;
 	struct ucred *cred;
 	int error, flags, locktype, vfslocked;
+	int writeable_shared;

 	mp = vp->v_mount;
 	cred = td->td_ucred;
-	if ((*maxprotp & VM_PROT_WRITE) && (*flagsp & MAP_SHARED))
+	flags = *flagsp;
+	writeable_shared = ((*maxprotp & VM_PROT_WRITE) && (flags & MAP_SHARED));
+	if (writeable_shared || ((flags & MAP_TEXT) != 0))
 		locktype = LK_EXCLUSIVE;
 	else
 		locktype = LK_SHARED;
@@ -1271,7 +1274,6 @@
 		return (error);
 	}
 	foff = *foffp;
-	flags = *flagsp;
 	obj = vp->v_object;
 	if (vp->v_type == VREG) {
 		/*
@@ -1294,7 +1296,7 @@
 				return (error);
 			}
 		}
-		if (locktype == LK_EXCLUSIVE) {
+		if (writeable_shared) {
 			*writecounted = TRUE;
 			vnode_pager_update_writecount(obj, 0, objsize);
 		}
@@ -1337,6 +1339,14 @@
 		error = ENOMEM;
 		goto done;
 	}
+	/*
+	 * If MAP_TEXT is announced, set VV_TEXT so no one can write
+	 * to the executable.
+	 */
+	if ((flags & MAP_TEXT) != 0) {
+		ASSERT_VOP_ELOCKED(vp, "vv_text");
+		vp->v_vflag |= VV_TEXT;
+	}
 	*objp = obj;
 	*flagsp = flags;

Index: sys/sys/mman.h
===================================================================
--- sys/sys/mman.h	(revision 239770)
+++ sys/sys/mman.h	(working copy)
@@ -91,6 +91,7 @@
  */
 #define	MAP_NOCORE	 0x00020000 /* dont include these pages in a coredump */
 #define	MAP_PREFAULT_READ 0x00040000 /* prefault mapping for reading */
+#define	MAP_TEXT	 0x00080000 /* map code segment */
 #endif /* __BSD_VISIBLE */

 #if __POSIX_VISIBLE >= 199309


More information about the freebsd-current mailing list