kern/123095 kern/131602 sendfile

Kostik Belousov kostikbel at gmail.com
Thu Jul 8 08:22:04 UTC 2010


On Thu, Jul 08, 2010 at 10:51:21AM +0300, Andriy Gapon wrote:
> 
> Not an expert by any measure but the following looks suspicious:
> m_copy/m_copym calls mb_dupcl for M_EXT case and M_RDONLY is _not_ checked nor
> preserved in that case.
> So we may get a writable M_EXT mbuf pointing to sf_buf wrapping a page of a file.
> But I am not sure if/how mbufs are re-used and if we can end up actually writing
> something to the resulting mbuf.
> 

My first catch was m_cat. With m_cat() changed as in the patch below, I
only get added KASSERT in sbcompress() fired sometimes. Before, it
reliably paniced on each run.

diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c
index 6f86895..530f704 100644
--- a/sys/i386/i386/vm_machdep.c
+++ b/sys/i386/i386/vm_machdep.c
@@ -810,6 +810,12 @@ sf_buf_alloc(struct vm_page *m, int flags)
 				nsfbufsused++;
 				nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
 			}
+			if ((flags & SFB_RONLY) == 0) {
+				ptep = vtopte(sf->kva);
+				opte = *ptep;
+				if ((opte & PG_RW) == 0)
+					*ptep |= PG_RW | PG_A;
+			}
 #ifdef SMP
 			goto shootdown;	
 #else
@@ -854,8 +860,9 @@ sf_buf_alloc(struct vm_page *m, int flags)
        PT_SET_MA(sf->kva, xpmap_ptom(VM_PAGE_TO_PHYS(m)) | pgeflag
 	   | PG_RW | PG_V | pmap_cache_bits(m->md.pat_mode, 0));
 #else
-	*ptep = VM_PAGE_TO_PHYS(m) | pgeflag | PG_RW | PG_V |
-	    pmap_cache_bits(m->md.pat_mode, 0);
+       *ptep = VM_PAGE_TO_PHYS(m) | pgeflag |
+	   ((flags & SFB_RONLY) ? 0 : PG_RW) | PG_V |
+	   pmap_cache_bits(m->md.pat_mode, 0);
 #endif
 
 	/*
diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c
index f41eb03..2af543d 100644
--- a/sys/kern/uipc_mbuf.c
+++ b/sys/kern/uipc_mbuf.c
@@ -911,7 +911,8 @@ m_cat(struct mbuf *m, struct mbuf *n)
 		m = m->m_next;
 	while (n) {
 		if (m->m_flags & M_EXT ||
-		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
+		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN] ||
+		    !M_WRITABLE(m)) {
 			/* just join the two chains */
 			m->m_next = n;
 			return;
diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c
index 2060a2e..95fae7e 100644
--- a/sys/kern/uipc_sockbuf.c
+++ b/sys/kern/uipc_sockbuf.c
@@ -776,6 +776,8 @@ sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
 		    m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
 		    m->m_len <= M_TRAILINGSPACE(n) &&
 		    n->m_type == m->m_type) {
+			KASSERT(n->m_ext.ext_type != EXT_SFBUF,
+			    ("destroying file page %p", n));
 			bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
 			    (unsigned)m->m_len);
 			n->m_len += m->m_len;
diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
index 3165dab..df9cd69 100644
--- a/sys/kern/uipc_syscalls.c
+++ b/sys/kern/uipc_syscalls.c
@@ -2131,7 +2131,8 @@ retry_space:
 			 * as necessary, but this wait can be interrupted.
 			 */
 			if ((sf = sf_buf_alloc(pg,
-			    (mnw ? SFB_NOWAIT : SFB_CATCH))) == NULL) {
+			    (mnw ? SFB_NOWAIT : SFB_CATCH) | SFB_RONLY))
+			    == NULL) {
 				mbstat.sf_allocfail++;
 				vm_page_lock(pg);
 				vm_page_unwire(pg, 0);
@@ -2162,6 +2163,8 @@ retry_space:
 				m_cat(m, m0);
 			else
 				m = m0;
+			KASSERT((m0->m_flags & M_RDONLY) != 0,
+			    ("lost M_RDONLY"));
 
 			/* Keep track of bits processed. */
 			loopbytes += xfsize;
diff --git a/sys/sys/sf_buf.h b/sys/sys/sf_buf.h
index af42065..fcb31f8 100644
--- a/sys/sys/sf_buf.h
+++ b/sys/sys/sf_buf.h
@@ -41,6 +41,7 @@
 #define	SFB_CPUPRIVATE	2		/* Create a CPU private mapping. */
 #define	SFB_DEFAULT	0
 #define	SFB_NOWAIT	4		/* Return NULL if all bufs are used. */
+#define	SFB_RONLY	8		/* Map page read-only, if possible. */
 
 struct vm_page;
 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 196 bytes
Desc: not available
Url : http://lists.freebsd.org/pipermail/freebsd-net/attachments/20100708/4dc2ea28/attachment.pgp


More information about the freebsd-net mailing list