svn commit: r298230 - in head: lib/libstand sys/boot/common sys/boot/efi/libefi sys/boot/efi/loader sys/boot/i386/libfirewire sys/boot/i386/libi386 sys/boot/i386/loader sys/boot/mips/beri/loader sy...

Howard Su howard0su at gmail.com
Tue Apr 19 15:30:15 UTC 2016


this is wonderful change. this also helps a lot on the situation loading
many .ko files from local disk. In my case, loading 5 different ko is
reduced from 5 seconds to 1second. great job!

-Howard

On Tue, Apr 19, 2016 at 7:09 AM Allan Jude <allanjude at freebsd.org> wrote:

> Author: allanjude
> Date: Mon Apr 18 23:09:22 2016
> New Revision: 298230
> URL: https://svnweb.freebsd.org/changeset/base/298230
>
> Log:
>   A new implementation of the loader block cache
>
>   The block cache implementation in loader has proven to be almost
> useless, and in worst case even slowing down the disk reads due to
> insufficient cache size and extra memory copy.
>   Also the current cache implementation does not cache reads from CDs, or
> work with zfs built on top of multiple disks.
>   Instead of an LRU, this code uses a simple hash (O(1) read from cache),
> and instead of a single global cache, a separate cache per block device.
>   The cache also implements limited read-ahead to increase performance.
>   To simplify read ahead management, the read ahead will not wrap over
> bcache end, so in worst case, single block physical read will be performed
> to fill the last block in bcache.
>
>   Booting from a virtual CD over IPMI:
>   0ms latency, before: 27 second, after: 7 seconds
>   60ms latency, before: over 12 minutes, after: under 5 minutes.
>
>   Submitted by: Toomas Soome <tsoome at me.com>
>   Reviewed by:  delphij (previous version), emaste (previous version)
>   Relnotes:     yes
>   Differential Revision:        https://reviews.freebsd.org/D4713
>
> Modified:
>   head/lib/libstand/cd9660.c
>   head/lib/libstand/dosfs.c
>   head/lib/libstand/dosfs.h
>   head/lib/libstand/ext2fs.c
>   head/lib/libstand/read.c
>   head/lib/libstand/stand.h
>   head/lib/libstand/ufs.c
>   head/lib/libstand/write.c
>   head/sys/boot/common/bcache.c
>   head/sys/boot/common/bootstrap.h
>   head/sys/boot/common/disk.c
>   head/sys/boot/common/md.c
>   head/sys/boot/common/module.c
>   head/sys/boot/efi/libefi/efipart.c
>   head/sys/boot/efi/libefi/libefi.c
>   head/sys/boot/efi/loader/main.c
>   head/sys/boot/i386/libfirewire/firewire.c
>   head/sys/boot/i386/libi386/bioscd.c
>   head/sys/boot/i386/libi386/biosdisk.c
>   head/sys/boot/i386/libi386/biosmem.c
>   head/sys/boot/i386/libi386/pxe.c
>   head/sys/boot/i386/loader/main.c
>   head/sys/boot/mips/beri/loader/beri_disk_cfi.c
>   head/sys/boot/mips/beri/loader/beri_disk_sdcard.c
>   head/sys/boot/ofw/libofw/ofw_disk.c
>   head/sys/boot/pc98/libpc98/bioscd.c
>   head/sys/boot/pc98/libpc98/biosdisk.c
>   head/sys/boot/pc98/libpc98/biosmem.c
>   head/sys/boot/pc98/loader/main.c
>   head/sys/boot/powerpc/kboot/hostdisk.c
>   head/sys/boot/powerpc/ps3/ps3cdrom.c
>   head/sys/boot/powerpc/ps3/ps3disk.c
>   head/sys/boot/uboot/lib/disk.c
>   head/sys/boot/usb/storage/umass_loader.c
>   head/sys/boot/userboot/userboot/host.c
>   head/sys/boot/userboot/userboot/main.c
>   head/sys/boot/userboot/userboot/userboot_disk.c
>   head/sys/boot/zfs/zfs.c
>
> Modified: head/lib/libstand/cd9660.c
>
> ==============================================================================
> --- head/lib/libstand/cd9660.c  Mon Apr 18 22:00:26 2016        (r298229)
> +++ head/lib/libstand/cd9660.c  Mon Apr 18 23:09:22 2016        (r298230)
> @@ -143,7 +143,7 @@ susp_lookup_record(struct open_file *f,
>                 if (bcmp(sh->type, SUSP_CONTINUATION, 2) == 0) {
>                         shc = (ISO_RRIP_CONT *)sh;
>                         error = f->f_dev->dv_strategy(f->f_devdata, F_READ,
> -                           cdb2devb(isonum_733(shc->location)),
> +                           cdb2devb(isonum_733(shc->location)), 0,
>                             ISO_DEFAULT_BLOCK_SIZE, susp_buffer, &read);
>
>                         /* Bail if it fails. */
> @@ -288,7 +288,7 @@ cd9660_open(const char *path, struct ope
>         for (bno = 16;; bno++) {
>                 twiddle(1);
>                 rc = f->f_dev->dv_strategy(f->f_devdata, F_READ,
> cdb2devb(bno),
> -                                          ISO_DEFAULT_BLOCK_SIZE, buf,
> &read);
> +                                       0, ISO_DEFAULT_BLOCK_SIZE, buf,
> &read);
>                 if (rc)
>                         goto out;
>                 if (read != ISO_DEFAULT_BLOCK_SIZE) {
> @@ -322,7 +322,7 @@ cd9660_open(const char *path, struct ope
>                                 twiddle(1);
>                                 rc = f->f_dev->dv_strategy
>                                         (f->f_devdata, F_READ,
> -                                        cdb2devb(bno + boff),
> +                                        cdb2devb(bno + boff), 0,
>                                          ISO_DEFAULT_BLOCK_SIZE,
>                                          buf, &read);
>                                 if (rc)
> @@ -381,7 +381,7 @@ cd9660_open(const char *path, struct ope
>                 bno = isonum_733(rec.extent) +
> isonum_711(rec.ext_attr_length);
>                 twiddle(1);
>                 rc = f->f_dev->dv_strategy(f->f_devdata, F_READ,
> cdb2devb(bno),
> -                   ISO_DEFAULT_BLOCK_SIZE, buf, &read);
> +                   0, ISO_DEFAULT_BLOCK_SIZE, buf, &read);
>                 if (rc)
>                         goto out;
>                 if (read != ISO_DEFAULT_BLOCK_SIZE) {
> @@ -438,7 +438,8 @@ buf_read_file(struct open_file *f, char
>
>                 twiddle(16);
>                 rc = f->f_dev->dv_strategy(f->f_devdata, F_READ,
> -                   cdb2devb(blkno), ISO_DEFAULT_BLOCK_SIZE, fp->f_buf,
> &read);
> +                   cdb2devb(blkno), 0, ISO_DEFAULT_BLOCK_SIZE,
> +                   fp->f_buf, &read);
>                 if (rc)
>                         return (rc);
>                 if (read != ISO_DEFAULT_BLOCK_SIZE)
>
> Modified: head/lib/libstand/dosfs.c
>
> ==============================================================================
> --- head/lib/libstand/dosfs.c   Mon Apr 18 22:00:26 2016        (r298229)
> +++ head/lib/libstand/dosfs.c   Mon Apr 18 23:09:22 2016        (r298230)
> @@ -131,7 +131,18 @@ static DOS_DE dot[2] = {
>  #define stclus(sz, de)  ((sz) != 32 ? cv2((de)->clus) :          \
>                           ((u_int)cv2((de)->dex.h_clus) << 16) |  \
>                          cv2((de)->clus))
> -
> +
> +/*
> + * fat cache metadata
> + */
> +struct fatcache {
> +       int unit;       /* disk unit number */
> +       int size;       /* buffer (and fat) size in sectors */
> +       u_char *buf;
> +};
> +
> +static struct fatcache fat;
> +
>  static int dosunmount(DOS_FS *);
>  static int parsebs(DOS_FS *, DOS_BS *);
>  static int namede(DOS_FS *, const char *, DOS_DE **);
> @@ -143,8 +154,36 @@ static int fatcnt(DOS_FS *, u_int);
>  static int fatget(DOS_FS *, u_int *);
>  static int fatend(u_int, u_int);
>  static int ioread(DOS_FS *, u_int, void *, u_int);
> -static int iobuf(DOS_FS *, u_int);
> -static int ioget(struct open_file *, u_int, void *, u_int);
> +static int ioget(struct open_file *, daddr_t, size_t, void *, u_int);
> +
> +static void
> +dos_read_fat(DOS_FS *fs, struct open_file *fd)
> +{
> +    struct devdesc *dd = fd->f_devdata;
> +
> +    if (fat.buf != NULL) {             /* can we reuse old buffer? */
> +       if (fat.size != fs->spf) {
> +           free(fat.buf);              /* no, free old buffer */
> +           fat.buf = NULL;
> +       }
> +    }
> +
> +    if (fat.buf == NULL)
> +       fat.buf = malloc(secbyt(fs->spf));
> +
> +    if (fat.buf != NULL) {
> +       if (ioget(fd, fs->lsnfat, 0, fat.buf, secbyt(fs->spf)) == 0) {
> +           fat.size = fs->spf;
> +           fat.unit = dd->d_unit;
> +           return;
> +       }
> +    }
> +    if (fat.buf != NULL)       /* got IO error */
> +       free(fat.buf);
> +    fat.buf = NULL;
> +    fat.unit = -1;     /* impossible unit */
> +    fat.size = 0;
> +}
>
>  /*
>   * Mount DOS filesystem
> @@ -153,15 +192,25 @@ static int
>  dos_mount(DOS_FS *fs, struct open_file *fd)
>  {
>      int err;
> +    struct devdesc *dd = fd->f_devdata;
> +    u_char *buf;
>
>      bzero(fs, sizeof(DOS_FS));
>      fs->fd = fd;
> -    if ((err = !(fs->buf = malloc(SECSIZ)) ? errno : 0) ||
> -        (err = ioget(fs->fd, 0, fs->buf, 1)) ||
> -        (err = parsebs(fs, (DOS_BS *)fs->buf))) {
> +
> +    if ((err = !(buf = malloc(secbyt(1))) ? errno : 0) ||
> +        (err = ioget(fs->fd, 0, 0, buf, secbyt(1))) ||
> +        (err = parsebs(fs, (DOS_BS *)buf))) {
> +       if (buf != NULL)
> +           free(buf);
>          (void)dosunmount(fs);
>          return(err);
>      }
> +    free(buf);
> +
> +    if (fat.buf == NULL || fat.unit != dd->d_unit)
> +       dos_read_fat(fs, fd);
> +
>      fs->root = dot[0];
>      fs->root.name[0] = ' ';
>      if (fs->fatsz == 32) {
> @@ -194,8 +243,6 @@ dos_unmount(DOS_FS *fs)
>  static int
>  dosunmount(DOS_FS *fs)
>  {
> -    if (fs->buf)
> -        free(fs->buf);
>      free(fs);
>      return(0);
>  }
> @@ -252,42 +299,47 @@ dos_read(struct open_file *fd, void *buf
>      DOS_FILE *f = (DOS_FILE *)fd->f_fsdata;
>      int err = 0;
>
> +    /*
> +     * as ioget() can be called *a lot*, use twiddle here.
> +     * also 4 seems to be good value not to slow loading down too much:
> +     * with 270MB file (~540k ioget() calls, twiddle can easily waste
> 4-5sec.
> +     */
> +    twiddle(4);
>      nb = (u_int)nbyte;
>      if ((size = fsize(f->fs, &f->de)) == -1)
>         return EINVAL;
>      if (nb > (n = size - f->offset))
> -        nb = n;
> +       nb = n;
>      off = f->offset;
>      if ((clus = stclus(f->fs->fatsz, &f->de)))
> -        off &= f->fs->bsize - 1;
> +       off &= f->fs->bsize - 1;
>      c = f->c;
>      cnt = nb;
>      while (cnt) {
> -        n = 0;
> -        if (!c) {
> -            if ((c = clus))
> -                n = bytblk(f->fs, f->offset);
> -        } else if (!off)
> -            n++;
> -        while (n--) {
> -            if ((err = fatget(f->fs, &c)))
> +       n = 0;
> +       if (!c) {
> +           if ((c = clus))
> +               n = bytblk(f->fs, f->offset);
> +       } else if (!off)
> +           n++;
> +       while (n--) {
> +           if ((err = fatget(f->fs, &c)))
>                 goto out;
> -            if (!okclus(f->fs, c)) {
> +           if (!okclus(f->fs, c)) {
>                 err = EINVAL;
>                 goto out;
>             }
> -        }
> -        if (!clus || (n = f->fs->bsize - off) > cnt)
> -            n = cnt;
> -        if ((err = ioread(f->fs, (c ? blkoff(f->fs, c) :
> -                                     secbyt(f->fs->lsndir)) + off,
> -                         buf, n)))
> +       }
> +       if (!clus || (n = f->fs->bsize - off) > cnt)
> +           n = cnt;
> +       if ((err = ioread(f->fs, (c ? blkoff(f->fs, c) :
> +                                     secbyt(f->fs->lsndir)) + off, buf,
> n)))
>             goto out;
> -        f->offset += n;
> -        f->c = c;
> -        off = 0;
> -        buf = (char *)buf + n;
> -        cnt -= n;
> +       f->offset += n;
> +       f->c = c;
> +       off = 0;
> +       buf = (char *)buf + n;
> +       cnt -= n;
>      }
>   out:
>      if (resid)
> @@ -364,6 +416,23 @@ dos_stat(struct open_file *fd, struct st
>  }
>
>  static int
> +dos_checksum(char *name, char *ext)
> +{
> +    int x, i;
> +    char buf[11];
> +
> +    bcopy(name, buf, 8);
> +    bcopy(ext, buf+8, 3);
> +    x = 0;
> +    for (i = 0; i < 11; i++) {
> +       x = ((x & 1) << 7) | (x >> 1);
> +       x += buf[i];
> +       x &= 0xff;
> +    }
> +    return (x);
> +}
> +
> +static int
>  dos_readdir(struct open_file *fd, struct dirent *d)
>  {
>      /* DOS_FILE *f = (DOS_FILE *)fd->f_fsdata; */
> @@ -417,12 +486,7 @@ dos_readdir(struct open_file *fd, struct
>             }
>         } else {
>             if (xdn == 1) {
> -               x = 0;
> -               for (i = 0; i < 11; i++) {
> -                   x = ((x & 1) << 7) | (x >> 1);
> -                   x += dd.de.name[i];
> -                   x &= 0xff;
> -               }
> +               x = dos_checksum(dd.de.name, dd.de.ext);
>                 if (x == chk)
>                     break;
>             } else {
> @@ -555,7 +619,7 @@ lookup(DOS_FS *fs, u_int clus, const cha
>          else
>              return EINVAL;
>          for (sec = 0; sec < nsec; sec++) {
> -            if ((err = ioget(fs->fd, lsec + sec, dir, 1)))
> +            if ((err = ioget(fs->fd, lsec + sec, 0, dir, secbyt(1))))
>                  return err;
>              for (ent = 0; ent < DEPSEC; ent++) {
>                  if (!*dir[ent].de.name)
> @@ -577,9 +641,7 @@ lookup(DOS_FS *fs, u_int clus, const cha
>                          }
>                      } else if (!(dir[ent].de.attr & FA_LABEL)) {
>                          if ((ok = xdn == 1)) {
> -                            for (x = 0, i = 0; i < 11; i++)
> -                                x = ((((x & 1) << 7) | (x >> 1)) +
> -                                     dir[ent].de.name[i]) & 0xff;
> +                           x = dos_checksum(dir[ent].de.name,
> dir[ent].de.ext);
>                              ok = chk == x &&
>                                  !strcasecmp(name, (const char *)lfn);
>                          }
> @@ -699,22 +761,52 @@ fatcnt(DOS_FS *fs, u_int c)
>  }
>
>  /*
> - * Get next cluster in cluster chain
> + * Get next cluster in cluster chain. Use in core fat cache unless another
> + * device replaced it.
>   */
>  static int
>  fatget(DOS_FS *fs, u_int *c)
>  {
>      u_char buf[4];
> -    u_int x;
> -    int err;
> +    u_char *s;
> +    u_int x, offset, off, n, nbyte, lsec;
> +    struct devdesc *dd = fs->fd->f_devdata;
> +    int err = 0;
> +
> +    if (fat.unit != dd->d_unit) {
> +       /* fat cache was changed to another device, dont use it */
> +       err = ioread(fs, secbyt(fs->lsnfat) + fatoff(fs->fatsz, *c), buf,
> +           fs->fatsz != 32 ? 2 : 4);
> +       if (err)
> +           return err;
> +    } else {
> +       offset = fatoff(fs->fatsz, *c);
> +       nbyte = fs->fatsz != 32 ? 2 : 4;
> +
> +       s = buf;
> +       if ((off = offset & (SECSIZ - 1))) {
> +           offset -= off;
> +           lsec = bytsec(offset);
> +           offset += SECSIZ;
> +           if ((n = SECSIZ - off) > nbyte)
> +               n = nbyte;
> +           memcpy(s, fat.buf + secbyt(lsec) + off, n);
> +           s += n;
> +           nbyte -= n;
> +       }
> +       n = nbyte & (SECSIZ - 1);
> +       if (nbyte -= n) {
> +           memcpy(s, fat.buf + secbyt(bytsec(offset)), nbyte);
> +           offset += nbyte;
> +           s += nbyte;
> +       }
> +       if (n)
> +           memcpy(s, fat.buf + secbyt(bytsec(offset)), n);
> +    }
>
> -    err = ioread(fs, secbyt(fs->lsnfat) + fatoff(fs->fatsz, *c), buf,
> -                 fs->fatsz != 32 ? 2 : 4);
> -    if (err)
> -        return err;
>      x = fs->fatsz != 32 ? cv2(buf) : cv4(buf);
>      *c = fs->fatsz == 12 ? *c & 1 ? x >> 4 : x & 0xfff : x;
> -    return 0;
> +    return (0);
>  }
>
>  /*
> @@ -739,42 +831,24 @@ ioread(DOS_FS *fs, u_int offset, void *b
>      s = buf;
>      if ((off = offset & (SECSIZ - 1))) {
>          offset -= off;
> -        if ((err = iobuf(fs, bytsec(offset))))
> -            return err;
> -        offset += SECSIZ;
>          if ((n = SECSIZ - off) > nbyte)
>              n = nbyte;
> -        memcpy(s, fs->buf + off, n);
> +        if ((err = ioget(fs->fd, bytsec(offset), off, s, n)))
> +            return err;
> +        offset += SECSIZ;
>          s += n;
>          nbyte -= n;
>      }
>      n = nbyte & (SECSIZ - 1);
>      if (nbyte -= n) {
> -        if ((err = ioget(fs->fd, bytsec(offset), s, bytsec(nbyte))))
> +        if ((err = ioget(fs->fd, bytsec(offset), 0, s, nbyte)))
>              return err;
>          offset += nbyte;
>          s += nbyte;
>      }
>      if (n) {
> -        if ((err = iobuf(fs, bytsec(offset))))
> -            return err;
> -        memcpy(s, fs->buf, n);
> -    }
> -    return 0;
> -}
> -
> -/*
> - * Buffered sector-based I/O primitive
> - */
> -static int
> -iobuf(DOS_FS *fs, u_int lsec)
> -{
> -    int err;
> -
> -    if (fs->bufsec != lsec) {
> -        if ((err = ioget(fs->fd, lsec, fs->buf, 1)))
> +        if ((err = ioget(fs->fd, bytsec(offset), 0, s, n)))
>              return err;
> -        fs->bufsec = lsec;
>      }
>      return 0;
>  }
> @@ -783,13 +857,8 @@ iobuf(DOS_FS *fs, u_int lsec)
>   * Sector-based I/O primitive
>   */
>  static int
> -ioget(struct open_file *fd, u_int lsec, void *buf, u_int nsec)
> +ioget(struct open_file *fd, daddr_t lsec, size_t offset, void *buf, u_int
> size)
>  {
> -    int        err;
> -
> -    twiddle(1);
> -    if ((err = (fd->f_dev->dv_strategy)(fd->f_devdata, F_READ, lsec,
> -                                       secbyt(nsec), buf, NULL)))
> -       return(err);
> -    return(0);
> +    return ((fd->f_dev->dv_strategy)(fd->f_devdata, F_READ, lsec, offset,
> +       size, buf, NULL));
>  }
>
> Modified: head/lib/libstand/dosfs.h
>
> ==============================================================================
> --- head/lib/libstand/dosfs.h   Mon Apr 18 22:00:26 2016        (r298229)
> +++ head/lib/libstand/dosfs.h   Mon Apr 18 23:09:22 2016        (r298230)
> @@ -96,8 +96,6 @@ typedef union {
>
>  typedef struct {
>      struct open_file *fd;       /* file descriptor */
> -    u_char *buf;                /* buffer */
> -    u_int bufsec;               /* buffered sector */
>      u_int links;                /* active links to structure */
>      u_int spc;                  /* sectors per cluster */
>      u_int bsize;                /* cluster size in bytes */
>
> Modified: head/lib/libstand/ext2fs.c
>
> ==============================================================================
> --- head/lib/libstand/ext2fs.c  Mon Apr 18 22:00:26 2016        (r298229)
> +++ head/lib/libstand/ext2fs.c  Mon Apr 18 23:09:22 2016        (r298230)
> @@ -355,7 +355,7 @@ ext2fs_open(const char *upath, struct op
>         fp->f_fs = fs;
>         twiddle(1);
>         error = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
> -           EXT2_SBLOCK, EXT2_SBSIZE, (char *)fs, &buf_size);
> +           EXT2_SBLOCK, 0, EXT2_SBSIZE, (char *)fs, &buf_size);
>         if (error)
>                 goto out;
>
> @@ -397,7 +397,7 @@ ext2fs_open(const char *upath, struct op
>         fp->f_bg = malloc(len);
>         twiddle(1);
>         error = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
> -           EXT2_SBLOCK + EXT2_SBSIZE / DEV_BSIZE, len,
> +           EXT2_SBLOCK + EXT2_SBSIZE / DEV_BSIZE, 0, len,
>             (char *)fp->f_bg, &buf_size);
>         if (error)
>                 goto out;
> @@ -509,7 +509,7 @@ ext2fs_open(const char *upath, struct op
>
>                                 twiddle(1);
>                                 error =
> (f->f_dev->dv_strategy)(f->f_devdata,
> -                                   F_READ, fsb_to_db(fs, disk_block),
> +                                   F_READ, fsb_to_db(fs, disk_block), 0,
>                                     fs->fs_bsize, buf, &buf_size);
>                                 if (error)
>                                         goto out;
> @@ -570,7 +570,7 @@ read_inode(ino_t inumber, struct open_fi
>         buf = malloc(fs->fs_bsize);
>         twiddle(1);
>         error = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
> -           ino_to_db(fs, fp->f_bg, inumber), fs->fs_bsize, buf, &rsize);
> +           ino_to_db(fs, fp->f_bg, inumber), 0, fs->fs_bsize, buf,
> &rsize);
>         if (error)
>                 goto out;
>         if (rsize != fs->fs_bsize) {
> @@ -667,7 +667,7 @@ block_map(struct open_file *f, daddr_t f
>                                         malloc(fs->fs_bsize);
>                         twiddle(1);
>                         error = (f->f_dev->dv_strategy)(f->f_devdata,
> F_READ,
> -                           fsb_to_db(fp->f_fs, ind_block_num),
> fs->fs_bsize,
> +                           fsb_to_db(fp->f_fs, ind_block_num), 0,
> fs->fs_bsize,
>                             fp->f_blk[level], &fp->f_blksize[level]);
>                         if (error)
>                                 return (error);
> @@ -725,7 +725,7 @@ buf_read_file(struct open_file *f, char
>                 } else {
>                         twiddle(4);
>                         error = (f->f_dev->dv_strategy)(f->f_devdata,
> F_READ,
> -                           fsb_to_db(fs, disk_block), block_size,
> +                           fsb_to_db(fs, disk_block), 0, block_size,
>                             fp->f_buf, &fp->f_buf_size);
>                         if (error)
>                                 goto done;
>
> Modified: head/lib/libstand/read.c
>
> ==============================================================================
> --- head/lib/libstand/read.c    Mon Apr 18 22:00:26 2016        (r298229)
> +++ head/lib/libstand/read.c    Mon Apr 18 23:09:22 2016        (r298230)
> @@ -79,7 +79,7 @@ read(int fd, void *dest, size_t bcount)
>      if (f->f_flags & F_RAW) {
>         twiddle(4);
>         errno = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
> -                                       btodb(f->f_offset), bcount, dest,
> &resid);
> +                               btodb(f->f_offset), 0, bcount, dest,
> &resid);
>         if (errno)
>             return (-1);
>         f->f_offset += resid;
>
> Modified: head/lib/libstand/stand.h
>
> ==============================================================================
> --- head/lib/libstand/stand.h   Mon Apr 18 22:00:26 2016        (r298229)
> +++ head/lib/libstand/stand.h   Mon Apr 18 23:09:22 2016        (r298230)
> @@ -138,8 +138,8 @@ struct devsw {
>      const char dv_name[8];
>      int                dv_type;                /* opaque type constant,
> arch-dependant */
>      int                (*dv_init)(void);       /* early probe call */
> -    int                (*dv_strategy)(void *devdata, int rw, daddr_t blk,
> size_t size,
> -                              char *buf, size_t *rsize);
> +    int                (*dv_strategy)(void *devdata, int rw, daddr_t blk,
> +                       size_t offset, size_t size, char *buf, size_t
> *rsize);
>      int                (*dv_open)(struct open_file *f, ...);
>      int                (*dv_close)(struct open_file *f);
>      int                (*dv_ioctl)(struct open_file *f, u_long cmd, void
> *data);
> @@ -154,6 +154,24 @@ extern struct devsw netdev;
>
>  extern int errno;
>
> +/*
> + * Generic device specifier; architecture-dependent
> + * versions may be larger, but should be allowed to
> + * overlap.
> + */
> +struct devdesc
> +{
> +    struct devsw       *d_dev;
> +    int                        d_type;
> +#define DEVT_NONE      0
> +#define DEVT_DISK      1
> +#define DEVT_NET       2
> +#define DEVT_CD                3
> +#define DEVT_ZFS       4
> +    int                        d_unit;
> +    void               *d_opendata;
> +};
> +
>  struct open_file {
>      int                        f_flags;        /* see F_* below */
>      struct devsw       *f_dev;         /* pointer to device operations */
>
> Modified: head/lib/libstand/ufs.c
>
> ==============================================================================
> --- head/lib/libstand/ufs.c     Mon Apr 18 22:00:26 2016        (r298229)
> +++ head/lib/libstand/ufs.c     Mon Apr 18 23:09:22 2016        (r298230)
> @@ -157,7 +157,7 @@ read_inode(inumber, f)
>         buf = malloc(fs->fs_bsize);
>         twiddle(1);
>         rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
> -               fsbtodb(fs, ino_to_fsba(fs, inumber)), fs->fs_bsize,
> +               fsbtodb(fs, ino_to_fsba(fs, inumber)), 0, fs->fs_bsize,
>                 buf, &rsize);
>         if (rc)
>                 goto out;
> @@ -267,7 +267,7 @@ block_map(f, file_block, disk_block_p)
>                                         malloc(fs->fs_bsize);
>                         twiddle(1);
>                         rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
> -                               fsbtodb(fp->f_fs, ind_block_num),
> +                               fsbtodb(fp->f_fs, ind_block_num), 0,
>                                 fs->fs_bsize,
>                                 fp->f_blk[level],
>                                 &fp->f_blksize[level]);
> @@ -348,7 +348,7 @@ buf_write_file(f, buf_p, size_p)
>
>                 twiddle(4);
>                 rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
> -                       fsbtodb(fs, disk_block),
> +                       fsbtodb(fs, disk_block), 0,
>                         block_size, fp->f_buf, &fp->f_buf_size);
>                 if (rc)
>                         return (rc);
> @@ -367,7 +367,7 @@ buf_write_file(f, buf_p, size_p)
>
>         twiddle(4);
>         rc = (f->f_dev->dv_strategy)(f->f_devdata, F_WRITE,
> -               fsbtodb(fs, disk_block),
> +               fsbtodb(fs, disk_block), 0,
>                 block_size, fp->f_buf, &fp->f_buf_size);
>         return (rc);
>  }
> @@ -408,7 +408,7 @@ buf_read_file(f, buf_p, size_p)
>                 } else {
>                         twiddle(4);
>                         rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
> -                               fsbtodb(fs, disk_block),
> +                               fsbtodb(fs, disk_block), 0,
>                                 block_size, fp->f_buf, &fp->f_buf_size);
>                         if (rc)
>                                 return (rc);
> @@ -521,7 +521,7 @@ ufs_open(upath, f)
>          */
>         for (i = 0; sblock_try[i] != -1; i++) {
>                 rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
> -                   sblock_try[i] / DEV_BSIZE, SBLOCKSIZE,
> +                   sblock_try[i] / DEV_BSIZE, 0, SBLOCKSIZE,
>                     (char *)fs, &buf_size);
>                 if (rc)
>                         goto out;
> @@ -651,7 +651,7 @@ ufs_open(upath, f)
>
>                                 twiddle(1);
>                                 rc = (f->f_dev->dv_strategy)(f->f_devdata,
> -                                       F_READ, fsbtodb(fs, disk_block),
> +                                       F_READ, fsbtodb(fs, disk_block), 0,
>                                         fs->fs_bsize, buf, &buf_size);
>                                 if (rc)
>                                         goto out;
>
> Modified: head/lib/libstand/write.c
>
> ==============================================================================
> --- head/lib/libstand/write.c   Mon Apr 18 22:00:26 2016        (r298229)
> +++ head/lib/libstand/write.c   Mon Apr 18 23:09:22 2016        (r298230)
> @@ -82,7 +82,7 @@ write(fd, dest, bcount)
>         if (f->f_flags & F_RAW) {
>                 twiddle(4);
>                 errno = (f->f_dev->dv_strategy)(f->f_devdata, F_WRITE,
> -                       btodb(f->f_offset), bcount, dest, &resid);
> +                       btodb(f->f_offset), 0, bcount, dest, &resid);
>                 if (errno)
>                         return (-1);
>                 f->f_offset += resid;
>
> Modified: head/sys/boot/common/bcache.c
>
> ==============================================================================
> --- head/sys/boot/common/bcache.c       Mon Apr 18 22:00:26 2016
> (r298229)
> +++ head/sys/boot/common/bcache.c       Mon Apr 18 23:09:22 2016
> (r298230)
> @@ -1,5 +1,6 @@
>  /*-
>   * Copyright (c) 1998 Michael Smith <msmith at freebsd.org>
> + * Copyright 2015 Toomas Soome <tsoome at me.com>
>   * All rights reserved.
>   *
>   * Redistribution and use in source and binary forms, with or without
> @@ -25,99 +26,155 @@
>   */
>
>  #include <sys/cdefs.h>
> +#include <sys/param.h>
>  __FBSDID("$FreeBSD$");
>
>  /*
> - * Simple LRU block cache
> + * Simple hashed block cache
>   */
>
>  #include <sys/stdint.h>
>
>  #include <stand.h>
>  #include <string.h>
> -#include <bitstring.h>
> +#include <strings.h>
>
>  #include "bootstrap.h"
>
>  /* #define BCACHE_DEBUG */
>
>  #ifdef BCACHE_DEBUG
> -#define BCACHE_TIMEOUT 10
>  # define DEBUG(fmt, args...)   printf("%s: " fmt "\n" , __func__ , ##
> args)
>  #else
> -#define BCACHE_TIMEOUT 2
>  # define DEBUG(fmt, args...)
>  #endif
>
> -
>  struct bcachectl
>  {
>      daddr_t    bc_blkno;
> -    time_t     bc_stamp;
>      int                bc_count;
>  };
>
> -static struct bcachectl        *bcache_ctl;
> -static caddr_t         bcache_data;
> -static bitstr_t                *bcache_miss;
> -static u_int           bcache_nblks;
> -static u_int           bcache_blksize;
> -static u_int           bcache_hits, bcache_misses, bcache_ops,
> bcache_bypasses;
> -static u_int           bcache_flushes;
> -static u_int           bcache_bcount;
> -
> -static void    bcache_invalidate(daddr_t blkno);
> -static void    bcache_insert(caddr_t buf, daddr_t blkno);
> -static int     bcache_lookup(caddr_t buf, daddr_t blkno);
> +/*
> + * bcache per device node. cache is allocated on device first open and
> freed
> + * on last close, to save memory. The issue there is the size; biosdisk
> + * supports up to 31 (0x1f) devices. Classic setup would use single disk
> + * to boot from, but this has changed with zfs.
> + */
> +struct bcache {
> +    struct bcachectl   *bcache_ctl;
> +    caddr_t            bcache_data;
> +    u_int              bcache_nblks;
> +    size_t             ra;
> +};
> +
> +static u_int bcache_total_nblks;       /* set by bcache_init */
> +static u_int bcache_blksize;           /* set by bcache_init */
> +static u_int bcache_numdev;            /* set by bcache_add_dev */
> +/* statistics */
> +static u_int bcache_units;     /* number of devices with cache */
> +static u_int bcache_unit_nblks;        /* nblocks per unit */
> +static u_int bcache_hits;
> +static u_int bcache_misses;
> +static u_int bcache_ops;
> +static u_int bcache_bypasses;
> +static u_int bcache_bcount;
> +static u_int bcache_rablks;
> +
> +#define        BHASH(bc, blkno)        ((blkno) & ((bc)->bcache_nblks -
> 1))
> +#define        BCACHE_LOOKUP(bc, blkno)        \
> +       ((bc)->bcache_ctl[BHASH((bc), (blkno))].bc_blkno != (blkno))
> +#define        BCACHE_READAHEAD        256
> +#define        BCACHE_MINREADAHEAD     32
> +
> +static void    bcache_invalidate(struct bcache *bc, daddr_t blkno);
> +static void    bcache_insert(struct bcache *bc, daddr_t blkno);
> +static void    bcache_free_instance(struct bcache *bc);
>
>  /*
>   * Initialise the cache for (nblks) of (bsize).
>   */
> -int
> +void
>  bcache_init(u_int nblks, size_t bsize)
>  {
> -    /* discard any old contents */
> -    if (bcache_data != NULL) {
> -       free(bcache_data);
> -       bcache_data = NULL;
> -       free(bcache_ctl);
> -    }
> -
> -    /* Allocate control structures */
> -    bcache_nblks = nblks;
> +    /* set up control data */
> +    bcache_total_nblks = nblks;
>      bcache_blksize = bsize;
> -    bcache_data = malloc(bcache_nblks * bcache_blksize);
> -    bcache_ctl = (struct bcachectl *)malloc(bcache_nblks * sizeof(struct
> bcachectl));
> -    bcache_miss = bit_alloc((bcache_nblks + 1) / 2);
> -    if ((bcache_data == NULL) || (bcache_ctl == NULL) || (bcache_miss ==
> NULL)) {
> -       if (bcache_miss)
> -           free(bcache_miss);
> -       if (bcache_ctl)
> -           free(bcache_ctl);
> -       if (bcache_data)
> -           free(bcache_data);
> -       bcache_data = NULL;
> -       return(ENOMEM);
> -    }
> -
> -    return(0);
>  }
>
>  /*
> - * Flush the cache
> + * add number of devices to bcache. we have to divide cache space
> + * between the devices, so bcache_add_dev() can be used to set up the
> + * number. The issue is, we need to get the number before actual
> allocations.
> + * bcache_add_dev() is supposed to be called from device init() call, so
> the
> + * assumption is, devsw dv_init is called for plain devices first, and
> + * for zfs, last.
>   */
>  void
> -bcache_flush(void)
> +bcache_add_dev(int devices)
>  {
> -    u_int      i;
> +    bcache_numdev += devices;
> +}
>
> -    bcache_flushes++;
> +void *
> +bcache_allocate(void)
> +{
> +    u_int i;
> +    struct bcache *bc = malloc(sizeof (struct bcache));
> +    int disks = bcache_numdev;
> +
> +    if (disks == 0)
> +       disks = 1;      /* safe guard */
> +
> +    if (bc == NULL) {
> +       errno = ENOMEM;
> +       return (bc);
> +    }
> +
> +    /*
> +     * the bcache block count must be power of 2 for hash function
> +     */
> +    i = fls(disks) - 1;                /* highbit - 1 */
> +    if (disks > (1 << i))      /* next power of 2 */
> +       i++;
> +
> +    bc->bcache_nblks = bcache_total_nblks >> i;
> +    bcache_unit_nblks = bc->bcache_nblks;
> +    bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize);
> +    if (bc->bcache_data == NULL) {
> +       /* dont error out yet. fall back to 32 blocks and try again */
> +       bc->bcache_nblks = 32;
> +       bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize);
> +    }
> +
> +    bc->bcache_ctl = malloc(bc->bcache_nblks * sizeof(struct bcachectl));
> +
> +    if ((bc->bcache_data == NULL) || (bc->bcache_ctl == NULL)) {
> +       bcache_free_instance(bc);
> +       errno = ENOMEM;
> +       return(NULL);
> +    }
>
>      /* Flush the cache */
> -    for (i = 0; i < bcache_nblks; i++) {
> -       bcache_ctl[i].bc_count = -1;
> -       bcache_ctl[i].bc_blkno = -1;
> -    }
> +    for (i = 0; i < bc->bcache_nblks; i++) {
> +       bc->bcache_ctl[i].bc_count = -1;
> +       bc->bcache_ctl[i].bc_blkno = -1;
> +    }
> +    bcache_units++;
> +    bc->ra = BCACHE_READAHEAD; /* optimistic read ahead */
> +    return (bc);
> +}
> +
> +void
> +bcache_free(void *cache)
> +{
> +    struct bcache *bc = cache;
> +
> +    if (bc == NULL)
> +       return;
> +
> +    bcache_free_instance(bc);
> +    bcache_units--;
>  }
>
>  /*
> @@ -125,31 +182,22 @@ bcache_flush(void)
>   * cache with the new values.
>   */
>  static int
> -write_strategy(void *devdata, int unit, int rw, daddr_t blk, size_t size,
> -               char *buf, size_t *rsize)
> +write_strategy(void *devdata, int rw, daddr_t blk, size_t offset,
> +    size_t size, char *buf, size_t *rsize)
>  {
>      struct bcache_devdata      *dd = (struct bcache_devdata *)devdata;
> +    struct bcache              *bc = dd->dv_cache;
>      daddr_t                    i, nblk;
> -    int                                err;
>
>      nblk = size / bcache_blksize;
>
>      /* Invalidate the blocks being written */
>      for (i = 0; i < nblk; i++) {
> -       bcache_invalidate(blk + i);
> +       bcache_invalidate(bc, blk + i);
>      }
>
>      /* Write the blocks */
> -    err = dd->dv_strategy(dd->dv_devdata, rw, blk, size, buf, rsize);
> -
> -    /* Populate the block cache with the new data */
> -    if (err == 0) {
> -       for (i = 0; i < nblk; i++) {
> -           bcache_insert(buf + (i * bcache_blksize),blk + i);
> -       }
> -    }
> -
> -    return err;
> +    return (dd->dv_strategy(dd->dv_devdata, rw, blk, offset, size, buf,
> rsize));
>  }
>
>  /*
> @@ -158,61 +206,87 @@ write_strategy(void *devdata, int unit,
>   * device I/O and then use the I/O results to populate the cache.
>   */
>  static int
> -read_strategy(void *devdata, int unit, int rw, daddr_t blk, size_t size,
> -               char *buf, size_t *rsize)
> +read_strategy(void *devdata, int rw, daddr_t blk, size_t offset,
> +    size_t size, char *buf, size_t *rsize)
>  {
>      struct bcache_devdata      *dd = (struct bcache_devdata *)devdata;
> -    int                                p_size, result;
> -    daddr_t                    p_blk, i, j, nblk;
> +    struct bcache              *bc = dd->dv_cache;
> +    size_t                     i, nblk, p_size, r_size, complete, ra;
> +    int                                result;
> +    daddr_t                    p_blk;
>      caddr_t                    p_buf;
>
> +    if (bc == NULL) {
> +       errno = ENODEV;
> +       return (-1);
> +    }
> +
> +    if (rsize != NULL)
> +       *rsize = 0;
> +
>      nblk = size / bcache_blksize;
> +    if ((nblk == 0 && size != 0) || offset != 0)
> +       nblk++;
>      result = 0;
> +    complete = 1;
>
> -    /* Satisfy any cache hits up front */
> +    /* Satisfy any cache hits up front, break on first miss */
>      for (i = 0; i < nblk; i++) {
> -       if (bcache_lookup(buf + (bcache_blksize * i), blk + i)) {
> -           bit_set(bcache_miss, i);    /* cache miss */
> -           bcache_misses++;
> +       if (BCACHE_LOOKUP(bc, (daddr_t)(blk + i))) {
> +           bcache_misses += (nblk - i);
> +           complete = 0;
> +           if (nblk - i > BCACHE_MINREADAHEAD && bc->ra >
> BCACHE_MINREADAHEAD)
> +               bc->ra >>= 1;   /* reduce read ahead */
> +           break;
>         } else {
> -           bit_clear(bcache_miss, i);  /* cache hit */
>             bcache_hits++;
>         }
>      }
>
> -    /* Go back and fill in any misses  XXX optimise */
> -    p_blk = -1;
> -    p_buf = NULL;
> -    p_size = 0;
> -    for (i = 0; i < nblk; i++) {
> -       if (bit_test(bcache_miss, i)) {
> -           /* miss, add to pending transfer */
> -           if (p_blk == -1) {
> -               p_blk = blk + i;
> -               p_buf = buf + (bcache_blksize * i);
> -               p_size = 1;
> -           } else {
> -               p_size++;
> -           }
> -       } else if (p_blk != -1) {
> -           /* hit, complete pending transfer */
> -           result = dd->dv_strategy(dd->dv_devdata, rw, p_blk, p_size *
> bcache_blksize, p_buf, NULL);
> -           if (result != 0)
> -               goto done;
> -           for (j = 0; j < p_size; j++)
> -               bcache_insert(p_buf + (j * bcache_blksize), p_blk + j);
> -           p_blk = -1;
> -       }
> -    }
> -    if (p_blk != -1) {
> -       /* pending transfer left */
> -       result = dd->dv_strategy(dd->dv_devdata, rw, p_blk, p_size *
> bcache_blksize, p_buf, NULL);
> -       if (result != 0)
> -           goto done;
> -       for (j = 0; j < p_size; j++)
> -           bcache_insert(p_buf + (j * bcache_blksize), p_blk + j);
> -    }
> -
> +   if (complete) {     /* whole set was in cache, return it */
> +       if (bc->ra < BCACHE_READAHEAD)
> +               bc->ra <<= 1;   /* increase read ahead */
> +       bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)) + offset,
> +           buf, size);
> +       goto done;
> +   }
> +
> +    /*
> +     * Fill in any misses. From check we have i pointing to first missing
> +     * block, read in all remaining blocks + readahead.
> +     * We have space at least for nblk - i before bcache wraps.
> +     */
> +    p_blk = blk + i;
> +    p_buf = bc->bcache_data + (bcache_blksize * BHASH(bc, p_blk));
> +    r_size = bc->bcache_nblks - BHASH(bc, p_blk); /* remaining blocks */
> +
> +    p_size = MIN(r_size, nblk - i);    /* read at least those blocks */
> +
> +    ra = bc->bcache_nblks - BHASH(bc, p_blk + p_size);
> +    if (ra != bc->bcache_nblks) { /* do we have RA space? */
> +       ra = MIN(bc->ra, ra);
> +       p_size += ra;
> +    }
> +
> +    /* invalidate bcache */
> +    for (i = 0; i < p_size; i++) {
> +       bcache_invalidate(bc, p_blk + i);
> +    }
> +    r_size = 0;
> +    result = dd->dv_strategy(dd->dv_devdata, rw, p_blk, 0,
> +       p_size * bcache_blksize, p_buf, &r_size);
> +
> +    if (result)
> +       goto done;
> +
> +    r_size /= bcache_blksize;
> +    for (i = 0; i < r_size; i++)
> +       bcache_insert(bc, p_blk + i);
> +
> +    bcache_rablks += ra;
> +    bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)) + offset,
> buf,
> +       size);
> +
>   done:
>      if ((result == 0) && (rsize != NULL))
>         *rsize = size;
> @@ -220,130 +294,144 @@ read_strategy(void *devdata, int unit, i
>  }
>
>  /*
> - * Requests larger than 1/2 the cache size will be bypassed and go
> + * Requests larger than 1/2 cache size will be bypassed and go
>   * directly to the disk.  XXX tune this.
>   */
>  int
> -bcache_strategy(void *devdata, int unit, int rw, daddr_t blk, size_t size,
> -               char *buf, size_t *rsize)
> +bcache_strategy(void *devdata, int rw, daddr_t blk, size_t offset,
> +    size_t size, char *buf, size_t *rsize)
>  {
> -    static int                 bcache_unit = -1;
>      struct bcache_devdata      *dd = (struct bcache_devdata *)devdata;
> +    struct bcache              *bc = dd->dv_cache;
> +    u_int bcache_nblks = 0;
> +    int nblk, cblk, ret;
> +    size_t csize, isize, total;
>
>      bcache_ops++;
>
> -    if(bcache_unit != unit) {
>
> *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
> _______________________________________________
> svn-src-head at freebsd.org mailing list
> https://lists.freebsd.org/mailman/listinfo/svn-src-head
> To unsubscribe, send any mail to "svn-src-head-unsubscribe at freebsd.org"
>
-- 
-Howard


More information about the svn-src-head mailing list