PERFORCE change 168109 for review

Stanislav Sedov stas at FreeBSD.org
Thu Sep 3 15:46:39 UTC 2009


http://perforce.freebsd.org/chv.cgi?CH=168109

Change 168109 by stas at stas_yandex on 2009/09/03 15:46:07

	- Fix x86 support (I have not checked signals yet, though).
	- Rework the elf loader algotihm to match what rtld does:
	         map a chunk of anonymous memory on top of entire object's
	         address space and then map sections above it.

Affected files ...

.. //depot/projects/valgrind/coregrind/m_aspacemgr/aspacemgr-common.c#8 edit
.. //depot/projects/valgrind/coregrind/m_syswrap/syscall-x86-freebsd.S#9 edit
.. //depot/projects/valgrind/coregrind/m_ume/elf.c#3 edit
.. //depot/projects/valgrind/coregrind/pub_core_aspacemgr.h#4 edit

Differences ...

==== //depot/projects/valgrind/coregrind/m_aspacemgr/aspacemgr-common.c#8 (text+ko) ====

@@ -176,8 +176,8 @@
 #  elif defined(VGP_x86_freebsd)
    if (flags & VKI_MAP_ANONYMOUS && fd == 0)
       fd = -1;
-   res = VG_(do_syscall8)(__NR_mmap, (UWord)start, length,
-			  prot, flags, fd, 0, offset, offset >> 32ul);
+   res = VG_(do_syscall7)(__NR_mmap, (UWord)start, length,
+			  prot, flags, fd, offset, offset >> 32ul);
 #  elif defined(VGP_amd64_freebsd)
    if (flags & VKI_MAP_ANONYMOUS && fd == 0)
       fd = -1;
@@ -189,8 +189,7 @@
    return res;
 }
 
-static
-SysRes local_do_mprotect_NO_NOTIFY(Addr start, SizeT length, UInt prot)
+SysRes VG_(am_do_mprotect_NO_NOTIFY)(Addr start, SizeT length, UInt prot)
 {
    return VG_(do_syscall3)(__NR_mprotect, (UWord)start, length, prot );
 }
@@ -438,7 +437,7 @@
    aspacem_assert(VG_IS_PAGE_ALIGNED(stack));
 
    /* Protect the guard areas. */
-   sres = local_do_mprotect_NO_NOTIFY( 
+   sres = VG_(am_do_mprotect_NO_NOTIFY)( 
              (Addr) &stack[0], 
              VG_STACK_GUARD_SZB, VKI_PROT_NONE 
           );
@@ -448,7 +447,7 @@
       VG_STACK_GUARD_SZB, VKI_PROT_NONE 
    );
 
-   sres = local_do_mprotect_NO_NOTIFY( 
+   sres = VG_(am_do_mprotect_NO_NOTIFY)( 
              (Addr) &stack->bytes[VG_STACK_GUARD_SZB + VG_STACK_ACTIVE_SZB], 
              VG_STACK_GUARD_SZB, VKI_PROT_NONE 
           );

==== //depot/projects/valgrind/coregrind/m_syswrap/syscall-x86-freebsd.S#9 (text+ko) ====

@@ -1,13 +1,13 @@
 
 /*--------------------------------------------------------------------*/
-/*--- Support for doing system calls.          syscall-x86-linux.S ---*/
+/*--- Support for doing system calls.        syscall-x86-freebsd.S ---*/
 /*--------------------------------------------------------------------*/
 
 /*
   This file is part of Valgrind, a dynamic binary instrumentation
   framework.
 
-  Copyright (C) 2000-2008 Julian Seward 
+  Copyright (C) 2000-2007 Julian Seward 
      jseward at acm.org
 
   This program is free software; you can redistribute it and/or
@@ -53,10 +53,9 @@
 	back to regs->m_eax on completion.
 	
 	Returns 0 if the syscall was successfully called (even if the
-	syscall itself failed), or a nonzero error code in the lowest
-        8 bits if one of the sigprocmasks failed (there's no way to
-        determine which one failed).  And there's no obvious way to
-        recover from that either, but nevertheless we want to know.
+	syscall itself failed), or a -ve error code if one of the
+	sigprocmasks failed (there's no way to determine which one
+	failed).
 
 	VG_(fixup_guest_state_after_syscall_interrupted) does the
 	thread state fixup in the case where we were interrupted by a
@@ -64,153 +63,135 @@
 	
 	Prototype:
 
-	UWord ML_(do_syscall_for_client_WRK)(
-	                          Int syscallno,		// 0
-				  void* guest_state,		// 4
-				  const vki_sigset_t *sysmask,	// 8
-				  const vki_sigset_t *postmask,	// 12
-				  Int nsigwords,		// 16
-				  SyscallArgs *args)		// 20
+	Int ML_(do_syscall_for_client_WRK)(
+	                          Int syscallno,		// ebp+8
+				  void* guest_state,		// ebp+12
+				  const vki_sigset_t *sysmask,	// ebp+16
+				  const vki_sigset_t *postmask,	// ebp+20
+				  Int sigsetSzB)		// ebp+24
 
-See priv_types_n_macros.h for SyscallArgs layout:
-      UWord sysno;		// 0
-      UWord arg1;		// 4
-      UWord arg2;		// 8
-      UWord arg3;		// 12
-      UWord arg4;		// 16
-      UWord arg5;		// 20
-      UWord arg6;		// 24
-      UWord arg7;		// 28
-      UWord arg8;		// 32
-				   
+        Note that sigsetSzB is totally ignored (and irrelevant).
 */
 
-/* from vki_arch.h */	
+/* from vki-darwin.h, checked at startup by m_vki.c */	
 #define VKI_SIG_SETMASK	3
-	
-/* QQQ translate syscall abi conventions */
+
 .globl ML_(do_syscall_for_client_WRK)
 ML_(do_syscall_for_client_WRK):
-	/* save callee-saved regs */
-	push	%esi
-	push	%edi
-	push	%ebx
+	/* establish stack frame */
 	push	%ebp
-	subl	$(9*4), %esp		/* space for 8 args plus fake ret addr */
-#define FSZ	((9+4+1)*4)	/* 4 args + ret addr */
-
-1:	/* Even though we can't take a signal until the sigprocmask completes,
-	   start the range early.
+	mov	%esp, %ebp
+	subl	$8, %esp	/* 16-byte align stack */
+	
+1:	/* Even though we can't take a signal until the
+           sigprocmask completes, start the range early.
 	   If eip is in the range [1,2), the syscall hasn't been started yet */
 
 	/* Set the signal mask which should be current during the syscall. */
-	movl	$__NR_sigprocmask, %eax
-	movl	$VKI_SIG_SETMASK, %ecx
-	movl	%ecx, 4(%esp)
-	movl	8+FSZ(%esp), %ecx
-	movl	%ecx, 8(%esp)
-	movl	12+FSZ(%esp), %ecx
-	movl	%ecx, 12(%esp)
-	int	$0x80
-	jb	7f	/* sigprocmask failed */
-	
-	/* We have already collapsed the stupid FreeBSD/i386 indirect syscalls */
-	movl     20+FSZ(%esp), %ebx
-	movl	 0+FSZ(%esp), %eax	/* use syscallno argument rather than thread EAX */
-	/* copy 8 args */
-	movl     4(%ebx), %ecx
-	movl     8(%ebx), %edx
-	movl     12(%ebx), %esi
-	movl     16(%ebx), %edi
-	movl     %ecx, 4(%esp)
-	movl     %edx, 8(%esp)
-	movl     %esi, 12(%esp)
-	movl     %edi, 16(%esp)
-	movl     20(%ebx), %ecx
-	movl     24(%ebx), %edx
-	movl     28(%ebx), %esi
-	movl     32(%ebx), %edi
-	movl     %ecx, 20(%esp)
-	movl     %edx, 24(%esp)
-	movl     %esi, 28(%esp)
-	movl     %edi, 32(%esp)
+        pushl   20(%ebp)
+        pushl   16(%ebp)
+        pushl   $VKI_SIG_SETMASK
+        pushl   $0xcafebabe    /* totally fake return address */
+        movl    $__NR_sigprocmask, %eax
+        int     $0x80
+        jc      7f  /* sigprocmask failed */
+        addl    $16,%esp
+
+	/* Copy syscall parameters to the stack - assume no more than 8 
+	 * plus the return address */
+	/* do_syscall8 */
+	/* stack is currently aligned assuming 8 parameters */
+	movl	12(%ebp), %edx			
+	movl	OFFSET_x86_ESP(%edx), %edx	/* edx = simulated ESP */
+	movl	28+4(%edx), %eax
+	pushl	%eax
+	movl	24+4(%edx), %eax
+	pushl	%eax
+	movl	20+4(%edx), %eax
+	pushl	%eax
+	movl	16+4(%edx), %eax
+	pushl	%eax
+	movl	12+4(%edx), %eax
+	pushl	%eax
+	movl	8+4(%edx), %eax
+	pushl	%eax
+	movl	4+4(%edx), %eax
+	pushl	%eax
+	movl	0+4(%edx), %eax
+	pushl	%eax
+	/* return address */
+	movl	0(%edx), %eax
+	pushl	%eax
+
+	/* Put syscall number in eax */
+	movl	8(%ebp), %eax
 
 	/* If eip==2, then the syscall was either just about to start, 
 	   or was interrupted and the kernel was restarting it. */
-2:	int	$0x80
-3:	/* In the range [3, 4), the syscall result is in %eax/%edx/eflags,
-	   but hasn't been committed to EAX/EDX. */
-	pushf
-	popl	%edi				/* copy flags to %edi */
-	movl	4+FSZ(%esp), %ebx
-	movl	%eax, OFFSET_x86_EAX(%ebx)	/* save back to EAX */
-	movl	%edx, OFFSET_x86_EDX(%ebx)	/* save back to EDX */
-	/* QQQ Race here.  We aren't really "committed" until we have called
-	   LibVEX_GuestX86_put_eflag_c() to store the carry bit.  Unfortunately
-	   we cannot test for %eip in that range. It might be that the syscall
-	   result extraction code needs to be widened from ending at
-	   blksys_complete to blksys_finished instead.  Note that on FreeBSD,
-	   %edi is saved across a syscall so we don't have to worry about
-	   it getting trashed by the sigprocmask below. */
+2:	int	$0x80		/* UNIX (GrP fixme should be sysenter?) */
 
-4:	/* Re-block signals.  If eip is in [4,5), then the syscall is complete and 
-	   we needn't worry about it. */
-	/* QQQ: However, on FreeBSD, the trap handler has to export just carry */
-	movl	$__NR_sigprocmask, %eax
-	movl	$VKI_SIG_SETMASK, %ecx
+3:	/* In the range [3, 4), the syscall result is in %eax and %edx and C,
+           but hasn't been committed to the thread state. */
+	setc	0(%esp)				/* stash returned carry flag */
+	movl	12(%ebp), %ecx
+	movl	%eax, OFFSET_x86_EAX(%ecx)	/* save EAX to vex */
+	movl	%edx, OFFSET_x86_EDX(%ecx)	/* save EDX to vex */
+	/* save carry flag to vex */
+	subl	$12, %esp
 	movl	%ecx, 4(%esp)
-	movl	12+FSZ(%esp), %ecx
-	movl	%ecx, 8(%esp)
-	xorl	%ecx, %ecx
-	movl	%ecx, 12(%esp)
-	int	$0x80
-	jb	7f	/* sigprocmask failed */
+	movl	$0, 0(%esp)
+	movb	12(%esp), %al
+	movb	%al, 0(%esp)
+	call	LibVEX_GuestX86_put_eflag_c
+	addl	$12, %esp
+
+4:	/* Re-block signals.  If eip is in [4,5), then the syscall is
+           complete and we needn't worry about it. */
+        /* Set up for __pthread_sigmask(SIG_SETMASK, postmask, NULL) */
+        pushl   $0
+        pushl   20(%ebp)
+        pushl   $VKI_SIG_SETMASK
+        pushl   $0xcafef00d    /* totally fake return address */
+        movl    $__NR_sigprocmask, %eax
+        int     $0x80  /* should be sysenter? */
+        jc      7f  /* sigprocmask failed */
+        addl    $16,%esp
 
 5:	/* now safe from signals */
-
-	/* Export carry state */
-	movl	%edi, %eax
-	andl	$1, %eax	/* SUCCESS */
-	addl	$(9*4), %esp	/* args + fake return address */
+	movl	$0, %eax       /* SUCCESS */
+	movl	%ebp, %esp
 	popl	%ebp
-	popl	%ebx
-	popl	%edi
-	popl	%esi
 	ret
 
-7:	/* failure: return 0x8000 | error code */
-	orl	$0x8000, %eax
-	addl	$(9*4), %esp	/* args + fake return address */
+7:      /* failure: return 0x8000 | error code */
+        /* Note that we enter here with %esp being 16 too low
+           (4 extra words on the stack).  But because we're nuking
+           the stack frame now, that doesn't matter. */
+        andl    $0x7FFF, %eax
+        orl     $0x8000, %eax
+	movl	%ebp, %esp
 	popl	%ebp
-	popl	%ebx
-	popl	%edi
-	popl	%esi
 	ret
-#undef FSZ
 
-	
 .section .rodata
 /* export the ranges so that
    VG_(fixup_guest_state_after_syscall_interrupted) can do the
    right thing */
-	
+        
 .globl ML_(blksys_setup)
 .globl ML_(blksys_restart)
 .globl ML_(blksys_complete)
 .globl ML_(blksys_committed)
 .globl ML_(blksys_finished)
-ML_(blksys_setup):	.long 1b
-ML_(blksys_restart):	.long 2b
-ML_(blksys_complete):	.long 3b
-ML_(blksys_committed):	.long 4b
-ML_(blksys_finished):	.long 5b
+ML_(blksys_setup):      .long 1b
+ML_(blksys_restart):    .long 2b
+ML_(blksys_complete):   .long 3b
+ML_(blksys_committed):  .long 4b
+ML_(blksys_finished):   .long 5b
 .previous
-	
-/* Let the linker know we don't need an executable stack */
-.section .note.GNU-stack,"", at progbits
-
-#endif /* defined(VGP_x86_freebsd) */
-
+        
+#endif // defined(VGP_x86_freebsd)
+ 
 /*--------------------------------------------------------------------*/
 /*--- end                                                          ---*/
 /*--------------------------------------------------------------------*/

==== //depot/projects/valgrind/coregrind/m_ume/elf.c#3 (text+ko) ====

@@ -41,6 +41,7 @@
 #include "pub_core_libcfile.h"      // VG_(open) et al
 #include "pub_core_machine.h"       // VG_ELF_CLASS (XXX: which should be moved)
 #include "pub_core_mallocfree.h"    // VG_(malloc), VG_(free)
+#include "pub_core_vkiscnums.h"
 #include "pub_core_syscall.h"       // VG_(strerror)
 #include "pub_core_ume.h"           // self
 
@@ -160,22 +161,38 @@
    Int    i;
    SysRes res;
    ESZ(Addr) elfbrk = 0;
+   ESZ(Word) mapsize;
+   ESZ(Addr) base_offset, base_vaddr, base_vlimit, baseaddr;
+   ESZ(Phdr) *ph0, *phlast;
+
+   if (e->e.e_phnum < 1) {
+      VG_(printf)("valgrind: m_ume.c: too few sections\n");
+      return 1;
+   }
 
-   for (i = 0; i < e->e.e_phnum; i++) {
+   /* Map the entire address space of the object. */
+   ph0 = NULL;
+   phlast = NULL;
+   for (i = 1; i < e->e.e_phnum; i++) {
       ESZ(Phdr) *ph = &e->p[i];
-      ESZ(Addr) addr, brkaddr;
-      ESZ(Word) memsz;
-
       if (ph->p_type != PT_LOAD)
          continue;
-
-      addr    = ph->p_vaddr+base;
-      memsz   = ph->p_memsz;
-      brkaddr = addr+memsz;
-
-      if (brkaddr > elfbrk)
-         elfbrk = brkaddr;
+      if (ph0 == NULL)
+         ph0 = ph;
+      if (phlast == NULL || ph->p_vaddr > phlast->p_vaddr)
+         phlast = &e->p[i];
+   }
+   if (ph0 == NULL || phlast == NULL) {
+      VG_(printf)("valgrind: m_ume.c: too few loadable sections\n");
+      return 1;
    }
+   base_offset = VG_PGROUNDDN(ph0->p_offset);
+   base_vaddr = VG_PGROUNDDN(ph0->p_vaddr);
+   base_vlimit = VG_PGROUNDUP(phlast->p_vaddr + phlast->p_memsz);
+   mapsize = base_vlimit - base_vaddr;
+   baseaddr = VG_PGROUNDDN(ph0->p_vaddr + base);
+   res = VG_(am_mmap_anon_fixed_client)(baseaddr, mapsize, VKI_PROT_NONE);
+   check_mmap(res, baseaddr, mapsize);
 
    for (i = 0; i < e->e.e_phnum; i++) {
       ESZ(Phdr) *ph = &e->p[i];
@@ -199,12 +216,9 @@
       memsz   = ph->p_memsz;
       brkaddr = addr+memsz;
 
-      // Tom says: In the following, do what the Linux kernel does and only
-      // map the pages that are required instead of rounding everything to
-      // the specified alignment (ph->p_align).  (AMD64 doesn't work if you
-      // use ph->p_align -- part of stage2's memory gets trashed somehow.)
-      //
-      // The condition handles the case of a zero-length segment.
+      if (brkaddr > elfbrk)
+         elfbrk = brkaddr;
+
       if (VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr) > 0) {
          if (0) VG_(debugLog)(0,"ume","mmap_file_fixed_client #1\n");
          res = VG_(am_mmap_file_fixed_client)(
@@ -222,23 +236,38 @@
       if (memsz > filesz) {
          UInt bytes;
 
-         bytes = VG_PGROUNDUP(brkaddr)-VG_PGROUNDUP(bss);
+         bytes = VG_PGROUNDUP(bss) - bss;
          if (bytes > 0) {
-            if (0) VG_(debugLog)(0,"ume","mmap_anon_fixed_client #2\n");
-            res = VG_(am_mmap_anon_fixed_client)(
-                     VG_PGROUNDUP(bss), bytes,
-                     prot
-                  );
-            if (0) VG_(am_show_nsegments)(0,"after #2");
-            check_mmap(res, VG_PGROUNDUP(bss), bytes);
+            /* Make sure the end of the segment is writable */
+            if ((prot & VKI_PROT_WRITE) == 0) {
+               res = VG_(am_do_mprotect_NO_NOTIFY)((UWord)VG_PGROUNDDN(bss), VKI_PAGE_SIZE,
+                  prot|VKI_PROT_WRITE);
+               if (sr_isError(res)) {
+                  VG_(printf)("valgrind: m_ume.c: mprotect failed\n");
+                  return (1);
+               }
+            }
+            VG_(memset)((char *)bss, 0, bytes);
+            /* Reset the data protection back */
+            if ((prot & VKI_PROT_WRITE) == 0) {
+               res = VG_(am_do_mprotect_NO_NOTIFY)((UWord)VG_PGROUNDDN(bss), VKI_PAGE_SIZE,
+                  prot);
+               if (sr_isError(res)) {
+                  VG_(printf)("valgrind: m_ume.c: mprotect failed\n");
+                  return (1);
+               }
+            }
          }
 
-         bytes = bss & (VKI_PAGE_SIZE - 1);
-
-         // The 'prot' condition allows for a read-only bss
-         if ((prot & VKI_PROT_WRITE) && (bytes > 0)) {
-            bytes = VKI_PAGE_SIZE - bytes;
-            VG_(memset)((char *)bss, 0, bytes);
+         /* Overlay the BSS segment onto the proper region. */
+         if (VG_PGROUNDUP(brkaddr) > VG_PGROUNDUP(bss)) {
+            res = VG_(am_do_mprotect_NO_NOTIFY)((UWord)VG_PGROUNDUP(bss),
+               VG_PGROUNDUP(brkaddr) - VG_PGROUNDUP(bss), prot);
+            if (sr_isError(res)) {
+               VG_(printf)("valgrind: m_ume.c: mprotect failed\n");
+               return (1);
+            }
+            VG_(am_notify_mprotect)((Addr)VG_PGROUNDUP(bss), VG_PGROUNDUP(brkaddr) - VG_PGROUNDUP(bss), prot);
          }
       }
    }

==== //depot/projects/valgrind/coregrind/pub_core_aspacemgr.h#4 (text+ko) ====

@@ -164,6 +164,9 @@
    specified address range. */
 extern Bool VG_(am_notify_client_shmat)( Addr a, SizeT len, UInt prot );
 
+extern SysRes VG_(am_do_mprotect_NO_NOTIFY)
+   ( Addr start, SizeT length, UInt prot);
+
 /* Notifies aspacem that an mprotect was completed successfully.  The
    segment array is updated accordingly.  Note, as with
    VG_(am_notify_munmap), it is not the job of this function to reject
@@ -191,7 +194,6 @@
 extern SysRes VG_(am_do_mmap_NO_NOTIFY)
    ( Addr start, SizeT length, UInt prot, UInt flags, Int fd, Off64T offset);
 
-
 //--------------------------------------------------------------
 // Functions pertaining to AIX5-specific notifications.
 


More information about the p4-projects mailing list