PERFORCE change 168553 for review
Gleb Kurtsou
gk at FreeBSD.org
Mon Sep 14 23:07:42 UTC 2009
http://perforce.freebsd.org/chv.cgi?CH=168553
Change 168553 by gk at gk_h1 on 2009/09/14 23:07:07
implement mapped read and write
Affected files ...
.. //depot/projects/soc2009/gk_pefs/sys/fs/pefs/pefs_vnops.c#14 edit
Differences ...
==== //depot/projects/soc2009/gk_pefs/sys/fs/pefs/pefs_vnops.c#14 (text+ko) ====
@@ -51,18 +51,23 @@
#include <sys/mount.h>
#include <sys/mutex.h>
#include <sys/namei.h>
+#include <sys/sf_buf.h>
#include <sys/sysctl.h>
#include <sys/vnode.h>
#include <sys/dirent.h>
#include <sys/limits.h>
-
-#include <fs/pefs/pefs.h>
+#include <sys/proc.h>
+#include <sys/sched.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
#include <vm/vnode_pager.h>
+#include <fs/pefs/pefs.h>
+
static int pefs_bug_bypass = 0; /* for debugging: enables bypass printf'ing */
SYSCTL_INT(_debug, OID_AUTO, pefs_bug_bypass, CTLFLAG_RW,
&pefs_bug_bypass, 0, "");
@@ -631,32 +636,49 @@
}
static int
-pefs_vreg_grow(struct vnode *vp, u_quad_t nsize, struct ucred *cred)
+pefs_tryextend(struct vnode *vp, u_quad_t nsize, struct ucred *cred)
{
struct vnode *lvp = PEFS_LOWERVP(vp);
- struct vattr o_va;
+ struct vattr va;
struct uio *puio;
struct pefs_node *pn = VP_TO_PN(vp);
struct pefs_chunk pc;
struct pefs_ctx *ctx;
+ u_quad_t osize;
off_t offset;
size_t bsize, size;
int error;
- error = VOP_GETATTR(lvp, &o_va, cred);
+ MPASS(vp->v_type == VREG);
+
+ error = VOP_GETATTR(lvp, &va, cred);
if (error)
return (error);
+ osize = va.va_size;
- PEFSDEBUG("pefs_vreg_grow: old size %jd, new size %jd\n",
- nsize, o_va.va_size);
- if (nsize <= o_va.va_size)
+ if (nsize <= osize)
return (0);
- if (nsize - o_va.va_size >= INT_MAX)
- return (EINVAL);
- size = nsize - o_va.va_size;
+ if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
+ vn_lock(vp, LK_UPGRADE | LK_RETRY);
+ error = VOP_GETATTR(lvp, &va, cred);
+ if (error)
+ return (error);
+ osize = va.va_size;
+ if (nsize <= osize)
+ return (0);
+ }
+
+ PEFSDEBUG("pefs_tryextend: old size 0x%jx, new size 0x%jx\n", osize, nsize);
+
+ VATTR_NULL(&va);
+ va.va_size = nsize;
+ VOP_SETATTR(lvp, &va, cred);
+ vnode_pager_setsize(vp, nsize);
+
+ size = nsize - osize;
bsize = qmin(size, DFLTPHYS);
- offset = o_va.va_size;
+ offset = osize;
pefs_chunk_create(&pc, pn, bsize);
ctx = pefs_ctx_get();
@@ -665,14 +687,13 @@
pefs_chunk_zero(&pc);
pefs_data_encrypt_update(ctx, &pn->pn_tkey, &pc);
puio = pefs_chunk_uio(&pc, offset, UIO_WRITE);
- PEFSDEBUG("pefs_vreg_grow: resizing file; filling with zeros: offset=%jd, resid=%jd\n", offset, bsize);
+ PEFSDEBUG("pefs_tryextend: resizing file; filling with zeros: offset=0x%jx, resid=0x%jx\n", offset, bsize);
error = VOP_WRITE(lvp, puio, 0, cred);
if (error) {
/* try to reset */
- size = o_va.va_size;
- VATTR_NULL(&o_va);
- o_va.va_size = size;
- VOP_SETATTR(lvp, &o_va, cred);
+ VATTR_NULL(&va);
+ va.va_size = osize;
+ VOP_SETATTR(lvp, &va, cred);
break;
}
offset += bsize;
@@ -695,7 +716,6 @@
pefs_setattr(struct vop_setattr_args *ap)
{
struct vnode *vp = ap->a_vp;
- struct vnode *lvp;
struct ucred *cred = ap->a_cred;
struct vattr *vap = ap->a_vap;
int error;
@@ -723,22 +743,23 @@
* Disallow write attempts if the filesystem is
* mounted read-only.
*/
- if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ if ((vp->v_mount->mnt_flag & MNT_RDONLY) ||
+ pefs_no_keys(vp))
return (EROFS);
if (vp->v_type == VREG)
- error = pefs_vreg_grow(vp, vap->va_size, cred);
+ error = pefs_tryextend(vp, vap->va_size, cred);
else
- error = EOPNOTSUPP; // TODO pefs_vlnk_chsize
+ error = EOPNOTSUPP; /* TODO */
if (error)
return (error);
+ vnode_pager_setsize(vp, vap->va_size);
break;
default:
return (EOPNOTSUPP);
}
}
- lvp = PEFS_LOWERVP(vp);
- return (VOP_SETATTR(lvp, vap, cred));
+ return (VOP_SETATTR(PEFS_LOWERVP(vp), vap, cred));
}
/*
@@ -1041,6 +1062,15 @@
pefs_node_buf_free(pn);
VI_UNLOCK(vp);
+ if (vp->v_object != NULL) {
+ if (vp->v_object->resident_page_count > 0)
+ PEFSDEBUG("pefs_inactive: vobject has dirty pages: vp=%p count=%d\n",
+ vp, vp->v_object->resident_page_count);
+ VM_OBJECT_LOCK(vp->v_object);
+ vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
+ VM_OBJECT_UNLOCK(vp->v_object);
+ }
+
if ((pn->pn_flags & PN_WANTRECYCLE) || (pn->pn_flags & PN_HASKEY) == 0)
vrecycle(vp, td);
@@ -1206,7 +1236,6 @@
if (error)
break;
- /* Nothing was written.. somehow */
if (pc.pc_size == puio->uio_resid)
break;
pefs_chunk_setsize(&pc, pc.pc_size - puio->uio_resid);
@@ -1536,6 +1565,33 @@
return (error);
}
+static inline int
+pefs_getsize(struct vnode *vp, u_quad_t *sizep, struct ucred *cred)
+{
+ struct vattr va;
+ int error;
+
+ error = VOP_GETATTR(PEFS_LOWERVP(vp), &va, cred);
+ if (error == 0)
+ *sizep = va.va_size;
+
+ return (error);
+}
+
+static inline int
+pefs_ismapped(struct vnode *vp)
+{
+ vm_object_t object = vp->v_object;
+
+ if (object == NULL)
+ return (0);
+
+ if (object->resident_page_count > 0 || object->cache != NULL ||
+ object->root != NULL)
+ return (1);
+ return (0);
+}
+
static int
pefs_read(struct vop_read_args *ap)
{
@@ -1543,16 +1599,21 @@
struct vnode *lvp = PEFS_LOWERVP(vp);
struct uio *uio = ap->a_uio;
struct uio *puio;
+ struct ucred *cred = ap->a_cred;
struct pefs_node *pn = VP_TO_PN(vp);
struct pefs_chunk pc;
struct pefs_ctx *ctx;
- ssize_t bsize, done;
- int error = 0;
+ vm_page_t m;
+ vm_offset_t moffset;
+ u_quad_t fsize;
+ ssize_t bsize, msize, done;
+ int ioflag = ap->a_ioflag;
+ int error = 0, mapped, restart_decrypt;
if (vp->v_type == VDIR)
return (EISDIR);
if (!(pn->pn_flags & PN_HASKEY) || vp->v_type == VFIFO)
- return (VOP_READ(lvp, uio, ap->a_ioflag, ap->a_cred));
+ return (VOP_READ(lvp, uio, ioflag, cred));
if (vp->v_type != VREG)
return (EOPNOTSUPP);
if (uio->uio_resid == 0)
@@ -1560,40 +1621,72 @@
if (uio->uio_offset < 0)
return (EINVAL);
- bsize = qmin(uio->uio_resid, DFLTPHYS);
+ mapped = pefs_ismapped(vp);
+ bsize = qmin(uio->uio_resid, mapped ? PAGE_SIZE : DFLTPHYS);
+ error = pefs_getsize(vp, &fsize, cred);
+ if (error != 0)
+ return (error);
ctx = pefs_ctx_get();
- pefs_data_decrypt_start(ctx, &pn->pn_tkey, uio->uio_offset);
pefs_chunk_create(&pc, pn, bsize);
- while (uio->uio_resid > 0) {
+ restart_decrypt = 1;
+ while (uio->uio_resid > 0 && uio->uio_offset < fsize) {
+ bsize = qmin(uio->uio_resid, bsize);
+ bsize = qmin(fsize - uio->uio_offset, bsize);
+ pefs_chunk_setsize(&pc, bsize);
+
+ if (mapped) {
+ moffset = uio->uio_offset & PAGE_MASK;
+ msize = qmin(PAGE_SIZE - moffset, bsize);
+
+ VM_OBJECT_LOCK(vp->v_object);
+lookupvpg:
+ m = vm_page_lookup(vp->v_object,
+ OFF_TO_IDX(uio->uio_offset));
+ if (m != NULL && vm_page_is_valid(m, moffset, msize)) {
+ if (vm_page_sleep_if_busy(m, FALSE, "pefsmr"))
+ goto lookupvpg;
+ vm_page_busy(m);
+ VM_OBJECT_UNLOCK(vp->v_object);
+ PEFSDEBUG("pefs_read: mapped: offset=0x%jx moffset=0x%jx msize=0x%jx\n",
+ uio->uio_offset, moffset, msize);
+ error = uiomove_fromphys(&m, moffset, msize, uio);
+ VM_OBJECT_LOCK(vp->v_object);
+ vm_page_wakeup(m);
+ VM_OBJECT_UNLOCK(vp->v_object);
+ if (error != 0)
+ break;
+ restart_decrypt = 1;
+ continue;
+ } else if (m != NULL && uio->uio_segflg == UIO_NOCOPY) {
+ /* FIXME: UIO_NOCOPY is not supported */
+ VM_OBJECT_UNLOCK(vp->v_object);
+ return (EIO);
+ }
+ VM_OBJECT_UNLOCK(vp->v_object);
+ /* Page not cached. Make next read page-aligned. */
+ pefs_chunk_setsize(&pc, msize);
+ }
+
+ PEFSDEBUG("pefs_read: mapped=%d m=%d offset=0x%jx size=0x%jx\n",
+ mapped, m != NULL, uio->uio_offset, pc.pc_size);
puio = pefs_chunk_uio(&pc, uio->uio_offset, uio->uio_rw);
- error = VOP_READ(lvp, puio, ap->a_ioflag, ap->a_cred);
- if (error != 0) {
+ error = VOP_READ(lvp, puio, ioflag, cred);
+ if (error != 0)
break;
- }
done = pc.pc_size - puio->uio_resid;
-#if 0
- error = VOP_GETATTR(lvp, &va, ap->a_cred);
- if (error != 0) {
- pefs_chunk_free(&pc, pn);
- return (error);
- }
- if (va.va_size < uio->uio_offset) {
- /* Read past end of file */
- done -= uio->uio_offset - va.va_size;
- MPASS(done >= 0);
- }
-#endif
if (done <= 0)
break;
pefs_chunk_setsize(&pc, done);
+ if (restart_decrypt) {
+ restart_decrypt = 0;
+ pefs_data_decrypt_start(ctx, &pn->pn_tkey,
+ uio->uio_offset);
+ }
pefs_data_decrypt_update(ctx, &pn->pn_tkey, &pc);
pefs_chunk_copy(&pc, uio);
-
- bsize = qmin(uio->uio_resid, bsize);
- pefs_chunk_setsize(&pc, bsize);
}
pefs_ctx_free(ctx);
pefs_chunk_free(&pc, pn);
@@ -1606,19 +1699,28 @@
{
struct vnode *vp = ap->a_vp;
struct vnode *lvp = PEFS_LOWERVP(vp);
+ struct ucred *cred = ap->a_cred;
struct uio *uio = ap->a_uio;
struct uio *puio;
+ struct sf_buf *sf;
struct pefs_node *pn = VP_TO_PN(vp);
struct pefs_chunk pc;
struct pefs_ctx *ctx;
+ vm_page_t m = NULL;
+ vm_offset_t moffset;
+ vm_pindex_t idx;
+ u_quad_t nsize;
+ char *ma;
off_t offset;
- ssize_t resid, bsize;
- int error = 0;
+ ssize_t resid, bsize, msize;
+ int ioflag = ap->a_ioflag;
+ int restart_encrypt;
+ int error = 0, mapped;
if (vp->v_type == VDIR)
return (EISDIR);
if (vp->v_type == VFIFO)
- return (error = VOP_WRITE(lvp, uio, ap->a_ioflag, ap->a_cred));
+ return (VOP_WRITE(lvp, uio, ioflag, cred));
if (vp->v_type != VREG)
return (EOPNOTSUPP);
if (uio->uio_resid == 0)
@@ -1629,36 +1731,117 @@
if (!(pn->pn_flags & PN_HASKEY))
return (EROFS);
+ error = pefs_getsize(vp, &nsize, cred);
+ if (error != 0)
+ return (error);
+
+ if (ioflag & IO_APPEND) {
+ uio->uio_offset = nsize;
+ ioflag &= ~IO_APPEND;
+ }
+
offset = uio->uio_offset;
resid = uio->uio_resid;
- error = pefs_vreg_grow(vp, offset, ap->a_cred);
- if (error != 0)
- return (error);
+ if (offset > nsize) {
+ error = pefs_tryextend(vp, offset, cred);
+ if (error != 0)
+ return (error);
+ }
+
+ mapped = pefs_ismapped(vp);
+ bsize = qmin(resid, mapped ? PAGE_SIZE : DFLTPHYS);
+
+ if (offset + resid > nsize) {
+ PEFSDEBUG("pefs_write: extend: 0x%jx (old size: 0x%jx)\n", offset + resid, nsize);
+ nsize = offset + resid;
+ vnode_pager_setsize(vp, nsize);
+ }
- bsize = qmin(resid, DFLTPHYS);
ctx = pefs_ctx_get();
- pefs_data_encrypt_start(ctx, &pn->pn_tkey, uio->uio_offset);
- pefs_chunk_create(&pc, pn, bsize);
+ restart_encrypt = 1;
+ pefs_chunk_create(&pc, pn, mapped ? PAGE_SIZE : bsize);
while (resid > 0) {
+ bsize = qmin(resid, bsize);
+ if (mapped) {
+ moffset = offset & PAGE_MASK;
+ msize = qmin(PAGE_SIZE - moffset, bsize);
+ msize = qmin(nsize - offset, msize);
+ pefs_chunk_setsize(&pc, moffset + msize);
+
+ VM_OBJECT_LOCK(vp->v_object);
+lookupvpg:
+ idx = OFF_TO_IDX(offset);
+ m = vm_page_lookup(vp->v_object, idx);
+ if (m != NULL && vm_page_is_valid(m, 0, moffset + msize)) {
+ if (vm_page_sleep_if_busy(m, FALSE, "pefsmw"))
+ goto lookupvpg;
+ vm_page_busy(m);
+ vm_page_lock_queues();
+ vm_page_undirty(m);
+ vm_page_unlock_queues();
+ VM_OBJECT_UNLOCK(vp->v_object);
+ PEFSDEBUG("pefs_write: mapped: offset=0x%jx moffset=0x%jx msize=0x%jx\n",
+ offset, moffset, msize);
+ sched_pin();
+ sf = sf_buf_alloc(m, SFB_CPUPRIVATE);
+ ma = (char *)sf_buf_kva(sf);
+ error = uiomove(ma + moffset, msize, uio);
+ memcpy(pc.pc_base, ma, pc.pc_size);
+ sf_buf_free(sf);
+ sched_unpin();
+ VM_OBJECT_LOCK(vp->v_object);
+ vm_page_wakeup(m);
+ VM_OBJECT_UNLOCK(vp->v_object);
+ if (error != 0) {
+ break;
+ }
+ if (moffset != 0) {
+ resid += moffset;
+ offset -= moffset;
+ restart_encrypt = 1;
+ }
+ goto lower_update;
+ } else if (__predict_false(vp->v_object->cache != NULL)) {
+ PEFSDEBUG("pefs_write: free cache: 0x%jx\n", offset - moffset);
+ vm_page_cache_free(vp->v_object, idx,
+ idx + 1);
+ }
+ MPASS(m == NULL ||
+ !vm_page_is_valid(m, moffset, msize));
+ VM_OBJECT_UNLOCK(vp->v_object);
+ /* Page align consequent writes */
+ pefs_chunk_setsize(&pc, msize);
+ } else {
+ pefs_chunk_setsize(&pc, bsize);
+ }
pefs_chunk_copy(&pc, uio);
+lower_update:
+ PEFSDEBUG("pefs_write: mapped=%d m=%d offset=0x%jx size=0x%jx\n",
+ mapped, m != NULL, offset, pc.pc_size);
+ if (restart_encrypt) {
+ restart_encrypt = 0;
+ pefs_data_encrypt_start(ctx, &pn->pn_tkey, offset);
+ }
pefs_data_encrypt_update(ctx, &pn->pn_tkey, &pc);
puio = pefs_chunk_uio(&pc, offset, uio->uio_rw);
- error = VOP_WRITE(lvp, puio, ap->a_ioflag, ap->a_cred);
+ /* IO_APPEND handled above to prevent offset change races. */
+ error = VOP_WRITE(lvp, puio, ioflag, cred);
if (error != 0)
break;
MPASS(puio->uio_resid == 0);
- resid -= bsize;
- offset += bsize;
+ resid -= pc.pc_size;
+ offset += pc.pc_size;
- bsize = qmin(resid, bsize);
- pefs_chunk_setsize(&pc, bsize);
}
pefs_ctx_free(ctx);
pefs_chunk_free(&pc, pn);
+ MPASS(resid == uio->uio_resid);
+ MPASS(offset == uio->uio_offset);
+
return (error);
}
More information about the p4-projects
mailing list