minidumps are unsafe on amd64

Ruslan Ermilov ru at FreeBSD.org
Fri Jan 25 10:29:40 PST 2008


Hi,

Kernel minidumps on amd64 SMP can write beyond the bounds
of the configured dump device causing (as in our case) the
file system data following swap partition to be overwritten
with the dump contents.

The problem is that while we're in the process of dumping
mapped physical pages via a bitmap (in minidump_machdep.c),
other CPUs continue to work and may modify page mappings of
processes.  This in turn causes the modifications to
pv_entries, which in turn modifies the bitmap of pages to
dump.  As the result, we can dump more pages than we've
calculated, and since dumps are written to the end of the
dump device, we may end up overwriting it.

The attached patch mitigates the problem, but the real solution
seems to be to disable interrupts (there's an XXX about this
in kern_shutdown.c before calling doadump()), and stopping
other CPUs, so we don't modify page tables while we're dumping.

This only affects 7.x/8.x amd64 SMP systems configured with
minidump.  i386 systems aren't affected.


Cheers,
-- 
Ruslan Ermilov
ru at FreeBSD.org
FreeBSD committer
-------------- next part --------------
Index: minidump_machdep.c
===================================================================
RCS file: /home/ncvs/src/sys/amd64/amd64/minidump_machdep.c,v
retrieving revision 1.2
diff -u -p -r1.2 minidump_machdep.c
--- minidump_machdep.c	5 Dec 2006 11:31:33 -0000	1.2
+++ minidump_machdep.c	25 Jan 2008 17:47:49 -0000
@@ -60,6 +60,7 @@ int vm_page_dump_size;
 
 static struct kerneldumpheader kdh;
 static off_t dumplo;
+static int zz = 0;
 
 /* Handle chunked writes. */
 static size_t fragsz;
@@ -68,6 +69,9 @@ static size_t counter, progress;
 
 CTASSERT(sizeof(*vm_page_dump) == 8);
 
+static void _dump_add_page(vm_paddr_t pa);
+static void _dump_drop_page(vm_paddr_t pa);
+
 static int
 is_dumpable(vm_paddr_t pa)
 {
@@ -239,6 +243,7 @@ minidumpsys(struct dumperinfo *di)
 	}
 
 	/* Calculate dump size. */
+	zz = 1;
 	dumpsize = ptesize;
 	dumpsize += round_page(msgbufp->msg_size);
 	dumpsize += round_page(vm_page_dump_size);
@@ -251,7 +256,7 @@ minidumpsys(struct dumperinfo *di)
 			if (is_dumpable(pa)) {
 				dumpsize += PAGE_SIZE;
 			} else {
-				dump_drop_page(pa);
+				_dump_drop_page(pa);
 			}
 			bits &= ~(1ul << bit);
 		}
@@ -383,9 +388,11 @@ minidumpsys(struct dumperinfo *di)
 	/* Signal completion, signoff and exit stage left. */
 	di->dumper(di->priv, NULL, 0, 0, 0);
 	printf("\nDump complete\n");
+	zz = 0;
 	return;
 
  fail:
+	zz = 0;
 	if (error < 0)
 		error = -error;
 
@@ -397,8 +404,8 @@ minidumpsys(struct dumperinfo *di)
 		printf("\n** DUMP FAILED (ERROR %d) **\n", error);
 }
 
-void
-dump_add_page(vm_paddr_t pa)
+static void
+_dump_add_page(vm_paddr_t pa)
 {
 	int idx, bit;
 
@@ -409,7 +416,20 @@ dump_add_page(vm_paddr_t pa)
 }
 
 void
-dump_drop_page(vm_paddr_t pa)
+dump_add_page(vm_paddr_t pa)
+{
+
+	if (zz) {
+		printf("delaying %s\n", __func__);
+		while (zz)
+			DELAY(1000000);
+		printf("resuming %s\n", __func__);
+	}
+	_dump_add_page(pa);
+}
+
+static void
+_dump_drop_page(vm_paddr_t pa)
 {
 	int idx, bit;
 
@@ -418,3 +438,16 @@ dump_drop_page(vm_paddr_t pa)
 	bit = pa & 63;
 	atomic_clear_long(&vm_page_dump[idx], 1ul << bit);
 }
+
+void
+dump_drop_page(vm_paddr_t pa)
+{
+
+	if (zz) {
+		printf("delaying %s\n", __func__);
+		while (zz)
+			DELAY(1000000);
+		printf("resuming %s\n", __func__);
+	}
+	_dump_drop_page(pa);
+}


More information about the freebsd-current mailing list