svn commit: r298230 - in head: lib/libstand sys/boot/common sys/boot/efi/libefi sys/boot/efi/loader sys/boot/i386/libfirewire sys/boot/i386/libi386 sys/boot/i386/loader sys/boot/mips/beri/loader sy...

Allan Jude allanjude at FreeBSD.org
Mon Apr 18 23:09:24 UTC 2016


Author: allanjude
Date: Mon Apr 18 23:09:22 2016
New Revision: 298230
URL: https://svnweb.freebsd.org/changeset/base/298230

Log:
  A new implementation of the loader block cache
  
  The block cache implementation in loader has proven to be almost useless, and in worst case even slowing down the disk reads due to insufficient cache size and extra memory copy.
  Also the current cache implementation does not cache reads from CDs, or work with zfs built on top of multiple disks.
  Instead of an LRU, this code uses a simple hash (O(1) read from cache), and instead of a single global cache, a separate cache per block device.
  The cache also implements limited read-ahead to increase performance.
  To simplify read ahead management, the read ahead will not wrap over bcache end, so in worst case, single block physical read will be performed to fill the last block in bcache.
  
  Booting from a virtual CD over IPMI:
  0ms latency, before: 27 second, after: 7 seconds
  60ms latency, before: over 12 minutes, after: under 5 minutes.
  
  Submitted by:	Toomas Soome <tsoome at me.com>
  Reviewed by:	delphij (previous version), emaste (previous version)
  Relnotes:	yes
  Differential Revision:	https://reviews.freebsd.org/D4713

Modified:
  head/lib/libstand/cd9660.c
  head/lib/libstand/dosfs.c
  head/lib/libstand/dosfs.h
  head/lib/libstand/ext2fs.c
  head/lib/libstand/read.c
  head/lib/libstand/stand.h
  head/lib/libstand/ufs.c
  head/lib/libstand/write.c
  head/sys/boot/common/bcache.c
  head/sys/boot/common/bootstrap.h
  head/sys/boot/common/disk.c
  head/sys/boot/common/md.c
  head/sys/boot/common/module.c
  head/sys/boot/efi/libefi/efipart.c
  head/sys/boot/efi/libefi/libefi.c
  head/sys/boot/efi/loader/main.c
  head/sys/boot/i386/libfirewire/firewire.c
  head/sys/boot/i386/libi386/bioscd.c
  head/sys/boot/i386/libi386/biosdisk.c
  head/sys/boot/i386/libi386/biosmem.c
  head/sys/boot/i386/libi386/pxe.c
  head/sys/boot/i386/loader/main.c
  head/sys/boot/mips/beri/loader/beri_disk_cfi.c
  head/sys/boot/mips/beri/loader/beri_disk_sdcard.c
  head/sys/boot/ofw/libofw/ofw_disk.c
  head/sys/boot/pc98/libpc98/bioscd.c
  head/sys/boot/pc98/libpc98/biosdisk.c
  head/sys/boot/pc98/libpc98/biosmem.c
  head/sys/boot/pc98/loader/main.c
  head/sys/boot/powerpc/kboot/hostdisk.c
  head/sys/boot/powerpc/ps3/ps3cdrom.c
  head/sys/boot/powerpc/ps3/ps3disk.c
  head/sys/boot/uboot/lib/disk.c
  head/sys/boot/usb/storage/umass_loader.c
  head/sys/boot/userboot/userboot/host.c
  head/sys/boot/userboot/userboot/main.c
  head/sys/boot/userboot/userboot/userboot_disk.c
  head/sys/boot/zfs/zfs.c

Modified: head/lib/libstand/cd9660.c
==============================================================================
--- head/lib/libstand/cd9660.c	Mon Apr 18 22:00:26 2016	(r298229)
+++ head/lib/libstand/cd9660.c	Mon Apr 18 23:09:22 2016	(r298230)
@@ -143,7 +143,7 @@ susp_lookup_record(struct open_file *f, 
 		if (bcmp(sh->type, SUSP_CONTINUATION, 2) == 0) {
 			shc = (ISO_RRIP_CONT *)sh;
 			error = f->f_dev->dv_strategy(f->f_devdata, F_READ,
-			    cdb2devb(isonum_733(shc->location)),
+			    cdb2devb(isonum_733(shc->location)), 0,
 			    ISO_DEFAULT_BLOCK_SIZE, susp_buffer, &read);
 
 			/* Bail if it fails. */
@@ -288,7 +288,7 @@ cd9660_open(const char *path, struct ope
 	for (bno = 16;; bno++) {
 		twiddle(1);
 		rc = f->f_dev->dv_strategy(f->f_devdata, F_READ, cdb2devb(bno),
-					   ISO_DEFAULT_BLOCK_SIZE, buf, &read);
+					0, ISO_DEFAULT_BLOCK_SIZE, buf, &read);
 		if (rc)
 			goto out;
 		if (read != ISO_DEFAULT_BLOCK_SIZE) {
@@ -322,7 +322,7 @@ cd9660_open(const char *path, struct ope
 				twiddle(1);
 				rc = f->f_dev->dv_strategy
 					(f->f_devdata, F_READ,
-					 cdb2devb(bno + boff),
+					 cdb2devb(bno + boff), 0,
 					 ISO_DEFAULT_BLOCK_SIZE,
 					 buf, &read);
 				if (rc)
@@ -381,7 +381,7 @@ cd9660_open(const char *path, struct ope
 		bno = isonum_733(rec.extent) + isonum_711(rec.ext_attr_length);
 		twiddle(1);
 		rc = f->f_dev->dv_strategy(f->f_devdata, F_READ, cdb2devb(bno),
-		    ISO_DEFAULT_BLOCK_SIZE, buf, &read);
+		    0, ISO_DEFAULT_BLOCK_SIZE, buf, &read);
 		if (rc)
 			goto out;
 		if (read != ISO_DEFAULT_BLOCK_SIZE) {
@@ -438,7 +438,8 @@ buf_read_file(struct open_file *f, char 
 
 		twiddle(16);
 		rc = f->f_dev->dv_strategy(f->f_devdata, F_READ,
-		    cdb2devb(blkno), ISO_DEFAULT_BLOCK_SIZE, fp->f_buf, &read);
+		    cdb2devb(blkno), 0, ISO_DEFAULT_BLOCK_SIZE,
+		    fp->f_buf, &read);
 		if (rc)
 			return (rc);
 		if (read != ISO_DEFAULT_BLOCK_SIZE)

Modified: head/lib/libstand/dosfs.c
==============================================================================
--- head/lib/libstand/dosfs.c	Mon Apr 18 22:00:26 2016	(r298229)
+++ head/lib/libstand/dosfs.c	Mon Apr 18 23:09:22 2016	(r298230)
@@ -131,7 +131,18 @@ static DOS_DE dot[2] = {
 #define stclus(sz, de)  ((sz) != 32 ? cv2((de)->clus) :          \
                          ((u_int)cv2((de)->dex.h_clus) << 16) |  \
 			 cv2((de)->clus))
-    
+
+/*
+ * fat cache metadata
+ */
+struct fatcache {
+	int unit;	/* disk unit number */
+	int size;	/* buffer (and fat) size in sectors */
+	u_char *buf;
+};
+
+static struct fatcache fat;
+
 static int dosunmount(DOS_FS *);
 static int parsebs(DOS_FS *, DOS_BS *);
 static int namede(DOS_FS *, const char *, DOS_DE **);
@@ -143,8 +154,36 @@ static int fatcnt(DOS_FS *, u_int);
 static int fatget(DOS_FS *, u_int *);
 static int fatend(u_int, u_int);
 static int ioread(DOS_FS *, u_int, void *, u_int);
-static int iobuf(DOS_FS *, u_int);
-static int ioget(struct open_file *, u_int, void *, u_int);
+static int ioget(struct open_file *, daddr_t, size_t, void *, u_int);
+
+static void
+dos_read_fat(DOS_FS *fs, struct open_file *fd)
+{
+    struct devdesc *dd = fd->f_devdata;
+
+    if (fat.buf != NULL) {		/* can we reuse old buffer? */
+	if (fat.size != fs->spf) {
+	    free(fat.buf);		/* no, free old buffer */
+	    fat.buf = NULL;
+	}
+    }
+
+    if (fat.buf == NULL)
+	fat.buf = malloc(secbyt(fs->spf));
+
+    if (fat.buf != NULL) {
+	if (ioget(fd, fs->lsnfat, 0, fat.buf, secbyt(fs->spf)) == 0) {
+	    fat.size = fs->spf;
+	    fat.unit = dd->d_unit;
+	    return;
+	}
+    }
+    if (fat.buf != NULL)	/* got IO error */
+	free(fat.buf);
+    fat.buf = NULL;
+    fat.unit = -1;	/* impossible unit */
+    fat.size = 0;
+}
 
 /*
  * Mount DOS filesystem
@@ -153,15 +192,25 @@ static int
 dos_mount(DOS_FS *fs, struct open_file *fd)
 {
     int err;
+    struct devdesc *dd = fd->f_devdata;
+    u_char *buf;
 
     bzero(fs, sizeof(DOS_FS));
     fs->fd = fd;
-    if ((err = !(fs->buf = malloc(SECSIZ)) ? errno : 0) ||
-        (err = ioget(fs->fd, 0, fs->buf, 1)) ||
-        (err = parsebs(fs, (DOS_BS *)fs->buf))) {
+
+    if ((err = !(buf = malloc(secbyt(1))) ? errno : 0) ||
+        (err = ioget(fs->fd, 0, 0, buf, secbyt(1))) ||
+        (err = parsebs(fs, (DOS_BS *)buf))) {
+	if (buf != NULL)
+	    free(buf);
         (void)dosunmount(fs);
         return(err);
     }
+    free(buf);
+
+    if (fat.buf == NULL || fat.unit != dd->d_unit)
+	dos_read_fat(fs, fd);
+
     fs->root = dot[0];
     fs->root.name[0] = ' ';
     if (fs->fatsz == 32) {
@@ -194,8 +243,6 @@ dos_unmount(DOS_FS *fs)
 static int
 dosunmount(DOS_FS *fs)
 {
-    if (fs->buf)
-        free(fs->buf);
     free(fs);
     return(0);
 }
@@ -252,42 +299,47 @@ dos_read(struct open_file *fd, void *buf
     DOS_FILE *f = (DOS_FILE *)fd->f_fsdata;
     int err = 0;
 
+    /*
+     * as ioget() can be called *a lot*, use twiddle here.
+     * also 4 seems to be good value not to slow loading down too much:
+     * with 270MB file (~540k ioget() calls, twiddle can easily waste 4-5sec.
+     */
+    twiddle(4);
     nb = (u_int)nbyte;
     if ((size = fsize(f->fs, &f->de)) == -1)
 	return EINVAL;
     if (nb > (n = size - f->offset))
-        nb = n;
+	nb = n;
     off = f->offset;
     if ((clus = stclus(f->fs->fatsz, &f->de)))
-        off &= f->fs->bsize - 1;
+	off &= f->fs->bsize - 1;
     c = f->c;
     cnt = nb;
     while (cnt) {
-        n = 0;
-        if (!c) {
-            if ((c = clus))
-                n = bytblk(f->fs, f->offset);
-        } else if (!off)
-            n++;
-        while (n--) {
-            if ((err = fatget(f->fs, &c)))
+	n = 0;
+	if (!c) {
+	    if ((c = clus))
+		n = bytblk(f->fs, f->offset);
+	} else if (!off)
+	    n++;
+	while (n--) {
+	    if ((err = fatget(f->fs, &c)))
 		goto out;
-            if (!okclus(f->fs, c)) {
+	    if (!okclus(f->fs, c)) {
 		err = EINVAL;
 		goto out;
 	    }
-        }
-        if (!clus || (n = f->fs->bsize - off) > cnt)
-            n = cnt;
-        if ((err = ioread(f->fs, (c ? blkoff(f->fs, c) :
-				      secbyt(f->fs->lsndir)) + off,
-			  buf, n)))
+	}
+	if (!clus || (n = f->fs->bsize - off) > cnt)
+	    n = cnt;
+	if ((err = ioread(f->fs, (c ? blkoff(f->fs, c) :
+				      secbyt(f->fs->lsndir)) + off, buf, n)))
 	    goto out;
-        f->offset += n;
-        f->c = c;
-        off = 0;
-        buf = (char *)buf + n;
-        cnt -= n;
+	f->offset += n;
+	f->c = c;
+	off = 0;
+	buf = (char *)buf + n;
+	cnt -= n;
     }
  out:
     if (resid)
@@ -364,6 +416,23 @@ dos_stat(struct open_file *fd, struct st
 }
 
 static int
+dos_checksum(char *name, char *ext)
+{
+    int x, i;
+    char buf[11];
+
+    bcopy(name, buf, 8);
+    bcopy(ext, buf+8, 3);
+    x = 0;
+    for (i = 0; i < 11; i++) {
+	x = ((x & 1) << 7) | (x >> 1);
+	x += buf[i];
+	x &= 0xff;
+    }
+    return (x);
+}
+
+static int
 dos_readdir(struct open_file *fd, struct dirent *d)
 {
     /* DOS_FILE *f = (DOS_FILE *)fd->f_fsdata; */
@@ -417,12 +486,7 @@ dos_readdir(struct open_file *fd, struct
 	    }
 	} else {
 	    if (xdn == 1) {
-		x = 0;
-		for (i = 0; i < 11; i++) {
-		    x = ((x & 1) << 7) | (x >> 1);
-		    x += dd.de.name[i];
-		    x &= 0xff;
-		}
+		x = dos_checksum(dd.de.name, dd.de.ext);
 		if (x == chk)
 		    break;
 	    } else {
@@ -555,7 +619,7 @@ lookup(DOS_FS *fs, u_int clus, const cha
         else
             return EINVAL;
         for (sec = 0; sec < nsec; sec++) {
-            if ((err = ioget(fs->fd, lsec + sec, dir, 1)))
+            if ((err = ioget(fs->fd, lsec + sec, 0, dir, secbyt(1))))
                 return err;
             for (ent = 0; ent < DEPSEC; ent++) {
                 if (!*dir[ent].de.name)
@@ -577,9 +641,7 @@ lookup(DOS_FS *fs, u_int clus, const cha
                         }
                     } else if (!(dir[ent].de.attr & FA_LABEL)) {
                         if ((ok = xdn == 1)) {
-                            for (x = 0, i = 0; i < 11; i++)
-                                x = ((((x & 1) << 7) | (x >> 1)) +
-                                     dir[ent].de.name[i]) & 0xff;
+			    x = dos_checksum(dir[ent].de.name, dir[ent].de.ext);
                             ok = chk == x &&
                                 !strcasecmp(name, (const char *)lfn);
                         }
@@ -699,22 +761,52 @@ fatcnt(DOS_FS *fs, u_int c)
 }
 
 /*
- * Get next cluster in cluster chain
+ * Get next cluster in cluster chain. Use in core fat cache unless another
+ * device replaced it.
  */
 static int
 fatget(DOS_FS *fs, u_int *c)
 {
     u_char buf[4];
-    u_int x;
-    int err;
+    u_char *s;
+    u_int x, offset, off, n, nbyte, lsec;
+    struct devdesc *dd = fs->fd->f_devdata;
+    int err = 0;
+
+    if (fat.unit != dd->d_unit) {
+	/* fat cache was changed to another device, dont use it */
+	err = ioread(fs, secbyt(fs->lsnfat) + fatoff(fs->fatsz, *c), buf,
+	    fs->fatsz != 32 ? 2 : 4);
+	if (err)
+	    return err;
+    } else {
+	offset = fatoff(fs->fatsz, *c);
+	nbyte = fs->fatsz != 32 ? 2 : 4;
+
+	s = buf;
+	if ((off = offset & (SECSIZ - 1))) {
+	    offset -= off;
+	    lsec = bytsec(offset);
+	    offset += SECSIZ;
+	    if ((n = SECSIZ - off) > nbyte)
+		n = nbyte;
+	    memcpy(s, fat.buf + secbyt(lsec) + off, n);
+	    s += n;
+	    nbyte -= n;
+	}
+	n = nbyte & (SECSIZ - 1);
+	if (nbyte -= n) {
+	    memcpy(s, fat.buf + secbyt(bytsec(offset)), nbyte);
+	    offset += nbyte;
+	    s += nbyte;
+	}
+	if (n)
+	    memcpy(s, fat.buf + secbyt(bytsec(offset)), n);
+    }
 
-    err = ioread(fs, secbyt(fs->lsnfat) + fatoff(fs->fatsz, *c), buf,
-                 fs->fatsz != 32 ? 2 : 4);
-    if (err)
-        return err;
     x = fs->fatsz != 32 ? cv2(buf) : cv4(buf);
     *c = fs->fatsz == 12 ? *c & 1 ? x >> 4 : x & 0xfff : x;
-    return 0;
+    return (0);
 }
 
 /*
@@ -739,42 +831,24 @@ ioread(DOS_FS *fs, u_int offset, void *b
     s = buf;
     if ((off = offset & (SECSIZ - 1))) {
         offset -= off;
-        if ((err = iobuf(fs, bytsec(offset))))
-            return err;
-        offset += SECSIZ;
         if ((n = SECSIZ - off) > nbyte)
             n = nbyte;
-        memcpy(s, fs->buf + off, n);
+        if ((err = ioget(fs->fd, bytsec(offset), off, s, n)))
+            return err;
+        offset += SECSIZ;
         s += n;
         nbyte -= n;
     }
     n = nbyte & (SECSIZ - 1);
     if (nbyte -= n) {
-        if ((err = ioget(fs->fd, bytsec(offset), s, bytsec(nbyte))))
+        if ((err = ioget(fs->fd, bytsec(offset), 0, s, nbyte)))
             return err;
         offset += nbyte;
         s += nbyte;
     }
     if (n) {
-        if ((err = iobuf(fs, bytsec(offset))))
-            return err;
-        memcpy(s, fs->buf, n);
-    }
-    return 0;
-}
-
-/*
- * Buffered sector-based I/O primitive
- */
-static int
-iobuf(DOS_FS *fs, u_int lsec)
-{
-    int err;
-
-    if (fs->bufsec != lsec) {
-        if ((err = ioget(fs->fd, lsec, fs->buf, 1)))
+        if ((err = ioget(fs->fd, bytsec(offset), 0, s, n)))
             return err;
-        fs->bufsec = lsec;
     }
     return 0;
 }
@@ -783,13 +857,8 @@ iobuf(DOS_FS *fs, u_int lsec)
  * Sector-based I/O primitive
  */
 static int
-ioget(struct open_file *fd, u_int lsec, void *buf, u_int nsec)
+ioget(struct open_file *fd, daddr_t lsec, size_t offset, void *buf, u_int size)
 {
-    int	err;
-
-    twiddle(1);
-    if ((err = (fd->f_dev->dv_strategy)(fd->f_devdata, F_READ, lsec, 
-					secbyt(nsec), buf, NULL)))
-	return(err);
-    return(0);
+    return ((fd->f_dev->dv_strategy)(fd->f_devdata, F_READ, lsec, offset,
+	size, buf, NULL));
 }

Modified: head/lib/libstand/dosfs.h
==============================================================================
--- head/lib/libstand/dosfs.h	Mon Apr 18 22:00:26 2016	(r298229)
+++ head/lib/libstand/dosfs.h	Mon Apr 18 23:09:22 2016	(r298230)
@@ -96,8 +96,6 @@ typedef union {
 
 typedef struct {
     struct open_file *fd;       /* file descriptor */
-    u_char *buf;                /* buffer */
-    u_int bufsec;               /* buffered sector */
     u_int links;                /* active links to structure */
     u_int spc;                  /* sectors per cluster */
     u_int bsize;                /* cluster size in bytes */

Modified: head/lib/libstand/ext2fs.c
==============================================================================
--- head/lib/libstand/ext2fs.c	Mon Apr 18 22:00:26 2016	(r298229)
+++ head/lib/libstand/ext2fs.c	Mon Apr 18 23:09:22 2016	(r298230)
@@ -355,7 +355,7 @@ ext2fs_open(const char *upath, struct op
 	fp->f_fs = fs;
 	twiddle(1);
 	error = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
-	    EXT2_SBLOCK, EXT2_SBSIZE, (char *)fs, &buf_size);
+	    EXT2_SBLOCK, 0, EXT2_SBSIZE, (char *)fs, &buf_size);
 	if (error)
 		goto out;
 
@@ -397,7 +397,7 @@ ext2fs_open(const char *upath, struct op
 	fp->f_bg = malloc(len);
 	twiddle(1);
 	error = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
-	    EXT2_SBLOCK + EXT2_SBSIZE / DEV_BSIZE, len,
+	    EXT2_SBLOCK + EXT2_SBSIZE / DEV_BSIZE, 0, len,
 	    (char *)fp->f_bg, &buf_size);
 	if (error)
 		goto out;
@@ -509,7 +509,7 @@ ext2fs_open(const char *upath, struct op
 				
 				twiddle(1);
 				error = (f->f_dev->dv_strategy)(f->f_devdata,
-				    F_READ, fsb_to_db(fs, disk_block),
+				    F_READ, fsb_to_db(fs, disk_block), 0,
 				    fs->fs_bsize, buf, &buf_size);
 				if (error)
 					goto out;
@@ -570,7 +570,7 @@ read_inode(ino_t inumber, struct open_fi
 	buf = malloc(fs->fs_bsize);
 	twiddle(1);
 	error = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
-	    ino_to_db(fs, fp->f_bg, inumber), fs->fs_bsize, buf, &rsize);
+	    ino_to_db(fs, fp->f_bg, inumber), 0, fs->fs_bsize, buf, &rsize);
 	if (error)
 		goto out;
 	if (rsize != fs->fs_bsize) {
@@ -667,7 +667,7 @@ block_map(struct open_file *f, daddr_t f
 					malloc(fs->fs_bsize);
 			twiddle(1);
 			error = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
-			    fsb_to_db(fp->f_fs, ind_block_num), fs->fs_bsize,
+			    fsb_to_db(fp->f_fs, ind_block_num), 0, fs->fs_bsize,
 			    fp->f_blk[level], &fp->f_blksize[level]);
 			if (error)
 				return (error);
@@ -725,7 +725,7 @@ buf_read_file(struct open_file *f, char 
 		} else {
 			twiddle(4);
 			error = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
-			    fsb_to_db(fs, disk_block), block_size,
+			    fsb_to_db(fs, disk_block), 0, block_size,
 			    fp->f_buf, &fp->f_buf_size);
 			if (error)
 				goto done;

Modified: head/lib/libstand/read.c
==============================================================================
--- head/lib/libstand/read.c	Mon Apr 18 22:00:26 2016	(r298229)
+++ head/lib/libstand/read.c	Mon Apr 18 23:09:22 2016	(r298230)
@@ -79,7 +79,7 @@ read(int fd, void *dest, size_t bcount)
     if (f->f_flags & F_RAW) {
 	twiddle(4);
 	errno = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
-					btodb(f->f_offset), bcount, dest, &resid);
+				btodb(f->f_offset), 0, bcount, dest, &resid);
 	if (errno)
 	    return (-1);
 	f->f_offset += resid;

Modified: head/lib/libstand/stand.h
==============================================================================
--- head/lib/libstand/stand.h	Mon Apr 18 22:00:26 2016	(r298229)
+++ head/lib/libstand/stand.h	Mon Apr 18 23:09:22 2016	(r298230)
@@ -138,8 +138,8 @@ struct devsw {
     const char	dv_name[8];
     int		dv_type;		/* opaque type constant, arch-dependant */
     int		(*dv_init)(void);	/* early probe call */
-    int		(*dv_strategy)(void *devdata, int rw, daddr_t blk, size_t size,
-			       char *buf, size_t *rsize);
+    int		(*dv_strategy)(void *devdata, int rw, daddr_t blk,
+			size_t offset, size_t size, char *buf, size_t *rsize);
     int		(*dv_open)(struct open_file *f, ...);
     int		(*dv_close)(struct open_file *f);
     int		(*dv_ioctl)(struct open_file *f, u_long cmd, void *data);
@@ -154,6 +154,24 @@ extern struct devsw netdev;
 
 extern int errno;
 
+/*
+ * Generic device specifier; architecture-dependent
+ * versions may be larger, but should be allowed to
+ * overlap.
+ */
+struct devdesc
+{
+    struct devsw	*d_dev;
+    int			d_type;
+#define DEVT_NONE	0
+#define DEVT_DISK	1
+#define DEVT_NET	2
+#define DEVT_CD		3
+#define DEVT_ZFS	4
+    int			d_unit;
+    void		*d_opendata;
+};
+
 struct open_file {
     int			f_flags;	/* see F_* below */
     struct devsw	*f_dev;		/* pointer to device operations */

Modified: head/lib/libstand/ufs.c
==============================================================================
--- head/lib/libstand/ufs.c	Mon Apr 18 22:00:26 2016	(r298229)
+++ head/lib/libstand/ufs.c	Mon Apr 18 23:09:22 2016	(r298230)
@@ -157,7 +157,7 @@ read_inode(inumber, f)
 	buf = malloc(fs->fs_bsize);
 	twiddle(1);
 	rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
-		fsbtodb(fs, ino_to_fsba(fs, inumber)), fs->fs_bsize,
+		fsbtodb(fs, ino_to_fsba(fs, inumber)), 0, fs->fs_bsize,
 		buf, &rsize);
 	if (rc)
 		goto out;
@@ -267,7 +267,7 @@ block_map(f, file_block, disk_block_p)
 					malloc(fs->fs_bsize);
 			twiddle(1);
 			rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
-				fsbtodb(fp->f_fs, ind_block_num),
+				fsbtodb(fp->f_fs, ind_block_num), 0,
 				fs->fs_bsize,
 				fp->f_blk[level],
 				&fp->f_blksize[level]);
@@ -348,7 +348,7 @@ buf_write_file(f, buf_p, size_p)
 
 		twiddle(4);
 		rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
-			fsbtodb(fs, disk_block),
+			fsbtodb(fs, disk_block), 0,
 			block_size, fp->f_buf, &fp->f_buf_size);
 		if (rc)
 			return (rc);
@@ -367,7 +367,7 @@ buf_write_file(f, buf_p, size_p)
 
 	twiddle(4);
 	rc = (f->f_dev->dv_strategy)(f->f_devdata, F_WRITE,
-		fsbtodb(fs, disk_block),
+		fsbtodb(fs, disk_block), 0,
 		block_size, fp->f_buf, &fp->f_buf_size);
 	return (rc);
 }
@@ -408,7 +408,7 @@ buf_read_file(f, buf_p, size_p)
 		} else {
 			twiddle(4);
 			rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
-				fsbtodb(fs, disk_block),
+				fsbtodb(fs, disk_block), 0,
 				block_size, fp->f_buf, &fp->f_buf_size);
 			if (rc)
 				return (rc);
@@ -521,7 +521,7 @@ ufs_open(upath, f)
 	 */
 	for (i = 0; sblock_try[i] != -1; i++) {
 		rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
-		    sblock_try[i] / DEV_BSIZE, SBLOCKSIZE,
+		    sblock_try[i] / DEV_BSIZE, 0, SBLOCKSIZE,
 		    (char *)fs, &buf_size);
 		if (rc)
 			goto out;
@@ -651,7 +651,7 @@ ufs_open(upath, f)
 				
 				twiddle(1);
 				rc = (f->f_dev->dv_strategy)(f->f_devdata,
-					F_READ, fsbtodb(fs, disk_block),
+					F_READ, fsbtodb(fs, disk_block), 0,
 					fs->fs_bsize, buf, &buf_size);
 				if (rc)
 					goto out;

Modified: head/lib/libstand/write.c
==============================================================================
--- head/lib/libstand/write.c	Mon Apr 18 22:00:26 2016	(r298229)
+++ head/lib/libstand/write.c	Mon Apr 18 23:09:22 2016	(r298230)
@@ -82,7 +82,7 @@ write(fd, dest, bcount)
 	if (f->f_flags & F_RAW) {
 		twiddle(4);
 		errno = (f->f_dev->dv_strategy)(f->f_devdata, F_WRITE,
-			btodb(f->f_offset), bcount, dest, &resid);
+			btodb(f->f_offset), 0, bcount, dest, &resid);
 		if (errno)
 			return (-1);
 		f->f_offset += resid;

Modified: head/sys/boot/common/bcache.c
==============================================================================
--- head/sys/boot/common/bcache.c	Mon Apr 18 22:00:26 2016	(r298229)
+++ head/sys/boot/common/bcache.c	Mon Apr 18 23:09:22 2016	(r298230)
@@ -1,5 +1,6 @@
 /*-
  * Copyright (c) 1998 Michael Smith <msmith at freebsd.org>
+ * Copyright 2015 Toomas Soome <tsoome at me.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -25,99 +26,155 @@
  */
 
 #include <sys/cdefs.h>
+#include <sys/param.h>
 __FBSDID("$FreeBSD$");
 
 /*
- * Simple LRU block cache
+ * Simple hashed block cache
  */
 
 #include <sys/stdint.h>
 
 #include <stand.h>
 #include <string.h>
-#include <bitstring.h>
+#include <strings.h>
 
 #include "bootstrap.h"
 
 /* #define BCACHE_DEBUG */
 
 #ifdef BCACHE_DEBUG
-#define BCACHE_TIMEOUT	10
 # define DEBUG(fmt, args...)	printf("%s: " fmt "\n" , __func__ , ## args)
 #else
-#define BCACHE_TIMEOUT	2
 # define DEBUG(fmt, args...)
 #endif
 
-
 struct bcachectl
 {
     daddr_t	bc_blkno;
-    time_t	bc_stamp;
     int		bc_count;
 };
 
-static struct bcachectl	*bcache_ctl;
-static caddr_t		bcache_data;
-static bitstr_t		*bcache_miss;
-static u_int		bcache_nblks;
-static u_int		bcache_blksize;
-static u_int		bcache_hits, bcache_misses, bcache_ops, bcache_bypasses;
-static u_int		bcache_flushes;
-static u_int		bcache_bcount;
-
-static void	bcache_invalidate(daddr_t blkno);
-static void	bcache_insert(caddr_t buf, daddr_t blkno);
-static int	bcache_lookup(caddr_t buf, daddr_t blkno);
+/*
+ * bcache per device node. cache is allocated on device first open and freed
+ * on last close, to save memory. The issue there is the size; biosdisk
+ * supports up to 31 (0x1f) devices. Classic setup would use single disk
+ * to boot from, but this has changed with zfs.
+ */
+struct bcache {
+    struct bcachectl	*bcache_ctl;
+    caddr_t		bcache_data;
+    u_int		bcache_nblks;
+    size_t		ra;
+};
+
+static u_int bcache_total_nblks;	/* set by bcache_init */
+static u_int bcache_blksize;		/* set by bcache_init */
+static u_int bcache_numdev;		/* set by bcache_add_dev */
+/* statistics */
+static u_int bcache_units;	/* number of devices with cache */
+static u_int bcache_unit_nblks;	/* nblocks per unit */
+static u_int bcache_hits;
+static u_int bcache_misses;
+static u_int bcache_ops;
+static u_int bcache_bypasses;
+static u_int bcache_bcount;
+static u_int bcache_rablks;
+
+#define	BHASH(bc, blkno)	((blkno) & ((bc)->bcache_nblks - 1))
+#define	BCACHE_LOOKUP(bc, blkno)	\
+	((bc)->bcache_ctl[BHASH((bc), (blkno))].bc_blkno != (blkno))
+#define	BCACHE_READAHEAD	256
+#define	BCACHE_MINREADAHEAD	32
+
+static void	bcache_invalidate(struct bcache *bc, daddr_t blkno);
+static void	bcache_insert(struct bcache *bc, daddr_t blkno);
+static void	bcache_free_instance(struct bcache *bc);
 
 /*
  * Initialise the cache for (nblks) of (bsize).
  */
-int
+void
 bcache_init(u_int nblks, size_t bsize)
 {
-    /* discard any old contents */
-    if (bcache_data != NULL) {
-	free(bcache_data);
-	bcache_data = NULL;
-	free(bcache_ctl);
-    }
-
-    /* Allocate control structures */
-    bcache_nblks = nblks;
+    /* set up control data */
+    bcache_total_nblks = nblks;
     bcache_blksize = bsize;
-    bcache_data = malloc(bcache_nblks * bcache_blksize);
-    bcache_ctl = (struct bcachectl *)malloc(bcache_nblks * sizeof(struct bcachectl));
-    bcache_miss = bit_alloc((bcache_nblks + 1) / 2);
-    if ((bcache_data == NULL) || (bcache_ctl == NULL) || (bcache_miss == NULL)) {
-	if (bcache_miss)
-	    free(bcache_miss);
-	if (bcache_ctl)
-	    free(bcache_ctl);
-	if (bcache_data)
-	    free(bcache_data);
-	bcache_data = NULL;
-	return(ENOMEM);
-    }
-
-    return(0);
 }
 
 /*
- * Flush the cache
+ * add number of devices to bcache. we have to divide cache space
+ * between the devices, so bcache_add_dev() can be used to set up the
+ * number. The issue is, we need to get the number before actual allocations.
+ * bcache_add_dev() is supposed to be called from device init() call, so the
+ * assumption is, devsw dv_init is called for plain devices first, and
+ * for zfs, last.
  */
 void
-bcache_flush(void)
+bcache_add_dev(int devices)
 {
-    u_int	i;
+    bcache_numdev += devices;
+}
 
-    bcache_flushes++;
+void *
+bcache_allocate(void)
+{
+    u_int i;
+    struct bcache *bc = malloc(sizeof (struct bcache));
+    int disks = bcache_numdev;
+
+    if (disks == 0)
+	disks = 1;	/* safe guard */
+
+    if (bc == NULL) {
+	errno = ENOMEM;
+	return (bc);
+    }
+
+    /*
+     * the bcache block count must be power of 2 for hash function
+     */
+    i = fls(disks) - 1;		/* highbit - 1 */
+    if (disks > (1 << i))	/* next power of 2 */
+	i++;
+
+    bc->bcache_nblks = bcache_total_nblks >> i;
+    bcache_unit_nblks = bc->bcache_nblks;
+    bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize);
+    if (bc->bcache_data == NULL) {
+	/* dont error out yet. fall back to 32 blocks and try again */
+	bc->bcache_nblks = 32;
+	bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize);
+    }
+
+    bc->bcache_ctl = malloc(bc->bcache_nblks * sizeof(struct bcachectl));
+
+    if ((bc->bcache_data == NULL) || (bc->bcache_ctl == NULL)) {
+	bcache_free_instance(bc);
+	errno = ENOMEM;
+	return(NULL);
+    }
 
     /* Flush the cache */
-    for (i = 0; i < bcache_nblks; i++) {
-	bcache_ctl[i].bc_count = -1;
-	bcache_ctl[i].bc_blkno = -1;
-    }
+    for (i = 0; i < bc->bcache_nblks; i++) {
+	bc->bcache_ctl[i].bc_count = -1;
+	bc->bcache_ctl[i].bc_blkno = -1;
+    }
+    bcache_units++;
+    bc->ra = BCACHE_READAHEAD;	/* optimistic read ahead */
+    return (bc);
+}
+
+void
+bcache_free(void *cache)
+{
+    struct bcache *bc = cache;
+
+    if (bc == NULL)
+	return;
+
+    bcache_free_instance(bc);
+    bcache_units--;
 }
 
 /*
@@ -125,31 +182,22 @@ bcache_flush(void)
  * cache with the new values.
  */
 static int
-write_strategy(void *devdata, int unit, int rw, daddr_t blk, size_t size,
-		char *buf, size_t *rsize)
+write_strategy(void *devdata, int rw, daddr_t blk, size_t offset,
+    size_t size, char *buf, size_t *rsize)
 {
     struct bcache_devdata	*dd = (struct bcache_devdata *)devdata;
+    struct bcache		*bc = dd->dv_cache;
     daddr_t			i, nblk;
-    int				err;
 
     nblk = size / bcache_blksize;
 
     /* Invalidate the blocks being written */
     for (i = 0; i < nblk; i++) {
-	bcache_invalidate(blk + i);
+	bcache_invalidate(bc, blk + i);
     }
 
     /* Write the blocks */
-    err = dd->dv_strategy(dd->dv_devdata, rw, blk, size, buf, rsize);
-
-    /* Populate the block cache with the new data */
-    if (err == 0) {
-    	for (i = 0; i < nblk; i++) {
-	    bcache_insert(buf + (i * bcache_blksize),blk + i);
-    	}
-    }
-
-    return err;
+    return (dd->dv_strategy(dd->dv_devdata, rw, blk, offset, size, buf, rsize));
 }
 
 /*
@@ -158,61 +206,87 @@ write_strategy(void *devdata, int unit, 
  * device I/O and then use the I/O results to populate the cache. 
  */
 static int
-read_strategy(void *devdata, int unit, int rw, daddr_t blk, size_t size,
-		char *buf, size_t *rsize)
+read_strategy(void *devdata, int rw, daddr_t blk, size_t offset,
+    size_t size, char *buf, size_t *rsize)
 {
     struct bcache_devdata	*dd = (struct bcache_devdata *)devdata;
-    int				p_size, result;
-    daddr_t			p_blk, i, j, nblk;
+    struct bcache		*bc = dd->dv_cache;
+    size_t			i, nblk, p_size, r_size, complete, ra;
+    int				result;
+    daddr_t			p_blk;
     caddr_t			p_buf;
 
+    if (bc == NULL) {
+	errno = ENODEV;
+	return (-1);
+    }
+
+    if (rsize != NULL)
+	*rsize = 0;
+
     nblk = size / bcache_blksize;
+    if ((nblk == 0 && size != 0) || offset != 0)
+	nblk++;
     result = 0;
+    complete = 1;
 
-    /* Satisfy any cache hits up front */
+    /* Satisfy any cache hits up front, break on first miss */
     for (i = 0; i < nblk; i++) {
-	if (bcache_lookup(buf + (bcache_blksize * i), blk + i)) {
-	    bit_set(bcache_miss, i);	/* cache miss */
-	    bcache_misses++;
+	if (BCACHE_LOOKUP(bc, (daddr_t)(blk + i))) {
+	    bcache_misses += (nblk - i);
+	    complete = 0;
+	    if (nblk - i > BCACHE_MINREADAHEAD && bc->ra > BCACHE_MINREADAHEAD)
+		bc->ra >>= 1;	/* reduce read ahead */
+	    break;
 	} else {
-	    bit_clear(bcache_miss, i);	/* cache hit */
 	    bcache_hits++;
 	}
     }
 
-    /* Go back and fill in any misses  XXX optimise */
-    p_blk = -1;
-    p_buf = NULL;
-    p_size = 0;
-    for (i = 0; i < nblk; i++) {
-	if (bit_test(bcache_miss, i)) {
-	    /* miss, add to pending transfer */
-	    if (p_blk == -1) {
-		p_blk = blk + i;
-		p_buf = buf + (bcache_blksize * i);
-		p_size = 1;
-	    } else {
-		p_size++;
-	    }
-	} else if (p_blk != -1) {
-	    /* hit, complete pending transfer */
-	    result = dd->dv_strategy(dd->dv_devdata, rw, p_blk, p_size * bcache_blksize, p_buf, NULL);
-	    if (result != 0)
-		goto done;
-	    for (j = 0; j < p_size; j++)
-		bcache_insert(p_buf + (j * bcache_blksize), p_blk + j);
-	    p_blk = -1;
-	}
-    }
-    if (p_blk != -1) {
-	/* pending transfer left */
-	result = dd->dv_strategy(dd->dv_devdata, rw, p_blk, p_size * bcache_blksize, p_buf, NULL);
-	if (result != 0)
-	    goto done;
-	for (j = 0; j < p_size; j++)
-	    bcache_insert(p_buf + (j * bcache_blksize), p_blk + j);
-    }
-    
+   if (complete) {	/* whole set was in cache, return it */
+	if (bc->ra < BCACHE_READAHEAD)
+		bc->ra <<= 1;	/* increase read ahead */
+	bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)) + offset,
+	    buf, size);
+	goto done;
+   }
+
+    /*
+     * Fill in any misses. From check we have i pointing to first missing
+     * block, read in all remaining blocks + readahead.
+     * We have space at least for nblk - i before bcache wraps.
+     */
+    p_blk = blk + i;
+    p_buf = bc->bcache_data + (bcache_blksize * BHASH(bc, p_blk));
+    r_size = bc->bcache_nblks - BHASH(bc, p_blk); /* remaining blocks */
+
+    p_size = MIN(r_size, nblk - i);	/* read at least those blocks */
+
+    ra = bc->bcache_nblks - BHASH(bc, p_blk + p_size);
+    if (ra != bc->bcache_nblks) { /* do we have RA space? */
+	ra = MIN(bc->ra, ra);
+	p_size += ra;
+    }
+
+    /* invalidate bcache */
+    for (i = 0; i < p_size; i++) {
+	bcache_invalidate(bc, p_blk + i);
+    }
+    r_size = 0;
+    result = dd->dv_strategy(dd->dv_devdata, rw, p_blk, 0,
+	p_size * bcache_blksize, p_buf, &r_size);
+
+    if (result)
+	goto done;
+
+    r_size /= bcache_blksize;
+    for (i = 0; i < r_size; i++)
+	bcache_insert(bc, p_blk + i);
+
+    bcache_rablks += ra;
+    bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)) + offset, buf,
+	size);
+
  done:
     if ((result == 0) && (rsize != NULL))
 	*rsize = size;
@@ -220,130 +294,144 @@ read_strategy(void *devdata, int unit, i
 }
 
 /* 
- * Requests larger than 1/2 the cache size will be bypassed and go
+ * Requests larger than 1/2 cache size will be bypassed and go
  * directly to the disk.  XXX tune this.
  */
 int
-bcache_strategy(void *devdata, int unit, int rw, daddr_t blk, size_t size,
-		char *buf, size_t *rsize)
+bcache_strategy(void *devdata, int rw, daddr_t blk, size_t offset,
+    size_t size, char *buf, size_t *rsize)
 {
-    static int			bcache_unit = -1;
     struct bcache_devdata	*dd = (struct bcache_devdata *)devdata;
+    struct bcache		*bc = dd->dv_cache;
+    u_int bcache_nblks = 0;
+    int nblk, cblk, ret;
+    size_t csize, isize, total;
 
     bcache_ops++;
 
-    if(bcache_unit != unit) {

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-all mailing list