svn commit: r272233 - user/marcel/mkimg
Marcel Moolenaar
marcel at FreeBSD.org
Sun Sep 28 00:43:05 UTC 2014
Author: marcel
Date: Sun Sep 28 00:43:04 2014
New Revision: 272233
URL: http://svnweb.freebsd.org/changeset/base/272233
Log:
Start performance optimizations:
Output formats typically need to know whether a sequence of blocks has data
or not. They use this to determine whether to allocate disk space for them
or not. The image_data() function provides that functionality, but is
implemented by reading the amount of blocks from the temporary file and
comparing that with zeroes. The QCOW format needs to go over the image 3
times and every time we read from the temporary file.
We can speed this up by building a "chunk" list in memory while we read the
partition data. Each chunk is a sequence of blocks that is either defined
as a gap (i.e. all zeroes) or defined as containing data. For each chunk we
keep track of the file and the offset in that file where the chunk's data
comes from. This allows us handle regular files and in particular sparse
files more optimally.
For sparse files we can trivially build a chunk for each of the holes in
the parse file by using SEEK_HOLE and SEEK_DATA. The data regions still
need to be read to handle zeroe blocks for block sizes smaller that the
underlying file system's block size. However, we don't have to copy the
data into a temporary file anymore.
For anything but regular files, we still use the temporary file. We call it
a swap file now. With that all data can be mapped and unmapped as we need
to access it.
This commit implements the creation of the chunk list and the swap file
usage for non-regular files (i.e. streams). Mappable files are now handled
like streams, so that needs some work.
The big part that is missing is the use of the chunk list for determining
whether a sequence of blocks has data and all the handling of writing the
image data to the output file.
As such: this commit breaks mkimg and makes it useless. It's a good WIP
to safe thogh -- hence doing it on my branch.
Modified:
user/marcel/mkimg/image.c
Modified: user/marcel/mkimg/image.c
==============================================================================
--- user/marcel/mkimg/image.c Sun Sep 28 00:24:01 2014 (r272232)
+++ user/marcel/mkimg/image.c Sun Sep 28 00:43:04 2014 (r272233)
@@ -27,67 +27,303 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <sys/mman.h>
+#include <sys/queue.h>
+#include <sys/stat.h>
#include <sys/types.h>
#include <assert.h>
#include <errno.h>
#include <limits.h>
#include <paths.h>
+#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
#include <unistd.h>
#include "image.h"
#include "mkimg.h"
-#define BUFFER_SIZE (1024*1024)
+struct chunk {
+ lba_t ch_block; /* Block address in image. */
+ off_t ch_ofs; /* Offset in backing file. */
+ STAILQ_ENTRY(chunk) ch_list;
+ size_t ch_size; /* Size of chunk in bytes. */
+ int ch_fd; /* FD of backing file. */
+ u_int ch_flags;
+#define CH_FLAGS_GAP 1 /* Chunk is a gap (no FD). */
+#define CH_FLAGS_DIRTY 2 /* Data modified/only in memory. */
+};
+
+static STAILQ_HEAD(chunk_head, chunk) image_chunks;
+static u_int image_nchunks;
+
+static char image_swap_file[PATH_MAX];
+static int image_swap_fd = -1;
+static u_int image_swap_pgsz;
+static off_t image_swap_size;
-static char image_tmpfile[PATH_MAX];
-static int image_fd = -1;
static lba_t image_size;
+/*
+ * Swap file handlng.
+ */
+
+static off_t
+image_swap_alloc(size_t size)
+{
+ off_t ofs;
+ size_t unit;
+
+ unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz;
+ assert((unit & (unit - 1)) == 0);
+
+ size = (size + unit - 1) & ~(unit - 1);
+
+ ofs = image_swap_size;
+ image_swap_size += size;
+ if (ftruncate(image_swap_fd, image_swap_size) == -1) {
+ image_swap_size = ofs;
+ ofs = -1LL;
+ }
+ fprintf(stderr, "SWAP: off=%jd, size=%zu\n", (intmax_t)ofs, size);
+ return (ofs);
+}
+
+/*
+ * Image chunk handling.
+ */
+
static void
-cleanup(void)
+image_chunk_dump(void)
{
+ struct chunk *ch;
- if (image_fd != -1)
- close(image_fd);
- unlink(image_tmpfile);
+ fprintf(stderr, "%u chunks:\n", image_nchunks);
+ STAILQ_FOREACH(ch, &image_chunks, ch_list) {
+ fprintf(stderr, "\tblk=%jd, ofs=%jd, fd=%d, sz=%zu, fl=%u\n",
+ (intmax_t)ch->ch_block, (intmax_t)ch->ch_ofs, ch->ch_fd,
+ ch->ch_size, ch->ch_flags);
+ }
}
-int
-image_copyin(lba_t blk, int fd, uint64_t *sizep)
+static size_t
+image_chunk_grow(struct chunk *ch, size_t sz)
+{
+ size_t dsz, newsz;
+
+ newsz = ch->ch_size + sz;
+ if (newsz > ch->ch_size) {
+ ch->ch_size = newsz;
+ return (0);
+ }
+ /* We would overflow -- create new chunk for remainder. */
+ dsz = SIZE_MAX - ch->ch_size;
+ assert(dsz < sz);
+ ch->ch_size = SIZE_MAX;
+ return (sz - dsz);
+}
+
+static int
+image_chunk_skipto(lba_t to)
+{
+ struct chunk *ch;
+ lba_t from;
+ size_t sz;
+
+ ch = STAILQ_LAST(&image_chunks, chunk, ch_list);
+ from = (ch != NULL) ? ch->ch_block + (ch->ch_size / secsz) : 0LL;
+
+ assert(from <= to);
+
+ /* Nothing to do? */
+ if (from == to)
+ return (0);
+ /* Avoid bugs due to overflows. */
+ if ((uintmax_t)(to - from) > (uintmax_t)(SIZE_MAX / secsz))
+ return (EFBIG);
+ sz = (to - from) * secsz;
+ if (ch != NULL && (ch->ch_flags & CH_FLAGS_GAP)) {
+ sz = image_chunk_grow(ch, sz);
+ if (sz == 0)
+ return (0);
+ from = ch->ch_block + (ch->ch_size / secsz);
+ }
+ ch = malloc(sizeof(*ch));
+ if (ch == NULL)
+ return (ENOMEM);
+ memset(ch, 0, sizeof(*ch));
+ ch->ch_block = from;
+ ch->ch_size = sz;
+ ch->ch_fd = -1;
+ ch->ch_flags |= CH_FLAGS_GAP;
+ STAILQ_INSERT_TAIL(&image_chunks, ch, ch_list);
+ image_nchunks++;
+ return (0);
+}
+
+static int
+image_chunk_append(lba_t blk, size_t sz, off_t ofs, int fd)
+{
+ struct chunk *ch;
+
+ ch = STAILQ_LAST(&image_chunks, chunk, ch_list);
+ if (ch != NULL && (ch->ch_flags & CH_FLAGS_GAP) == 0) {
+ if (fd == ch->ch_fd &&
+ blk == (lba_t)(ch->ch_block + (ch->ch_size / secsz)) &&
+ ofs == (off_t)(ch->ch_ofs + ch->ch_size)) {
+ sz = image_chunk_grow(ch, sz);
+ if (sz == 0)
+ return (0);
+ blk = ch->ch_block + (ch->ch_size / secsz);
+ ofs = ch->ch_ofs + ch->ch_size;
+ }
+ }
+ ch = malloc(sizeof(*ch));
+ if (ch == NULL)
+ return (ENOMEM);
+ memset(ch, 0, sizeof(*ch));
+ ch->ch_block = blk;
+ ch->ch_ofs = ofs;
+ ch->ch_size = sz;
+ ch->ch_fd = fd;
+ STAILQ_INSERT_TAIL(&image_chunks, ch, ch_list);
+ image_nchunks++;
+ return (0);
+}
+
+static int
+image_chunk_copyin(lba_t blk, void *buf, size_t sz, off_t ofs, int fd)
+{
+ uint64_t *p = buf;
+ size_t n;
+ int error;
+
+ assert(((uintptr_t)p & 3) == 0);
+
+ error = 0;
+ sz = (sz + secsz - 1) & ~(secsz - 1);
+ while (!error && sz > 0) {
+ n = 0;
+ while (n < (secsz >> 3) && p[n] == 0)
+ n++;
+ if (n == (secsz >> 3))
+ error = image_chunk_skipto(blk + 1);
+ else
+ error = image_chunk_append(blk, secsz, ofs, fd);
+ blk++;
+ p += (secsz >> 3);
+ sz -= secsz;
+ ofs += secsz;
+ }
+ return (error);
+}
+
+/*
+ * File mapping support.
+ */
+
+static void *
+image_file_map(int fd, off_t ofs, size_t sz)
+{
+ void *ptr;
+ size_t unit;
+ int flags, prot;
+
+ unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz;
+ assert((unit & (unit - 1)) == 0);
+
+ flags = MAP_NOCORE | MAP_NOSYNC | MAP_SHARED;
+ /* Allow writing to our swap file only. */
+ prot = PROT_READ | ((fd == image_swap_fd) ? PROT_WRITE : 0);
+ sz = (sz + unit - 1) & ~(unit - 1);
+ ptr = mmap(NULL, sz, prot, flags, fd, ofs);
+ return ((ptr == MAP_FAILED) ? NULL : ptr);
+}
+
+static int
+image_file_unmap(void *buffer, size_t sz)
+{
+ size_t unit;
+
+ unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz;
+ sz = (sz + unit - 1) & ~(unit - 1);
+ munmap(buffer, sz);
+ return (0);
+}
+
+/*
+ * Input/source file handling.
+ */
+
+static int
+image_copyin_stream(lba_t blk, int fd, uint64_t *sizep)
{
char *buffer;
uint64_t bytesize;
- ssize_t bcnt, rdsz;
- int error, partial;
+ off_t swofs;
+ size_t iosz;
+ ssize_t rdsz;
+ int error;
- assert(BUFFER_SIZE % secsz == 0);
+ /*
+ * This makes sure we're doing I/O in multiples of the page
+ * size as well as of the sector size. 2MB is the minimum
+ * by virtue of secsz at least 512 bytes and the page size
+ * at least 4K bytes.
+ */
+ iosz = secsz * image_swap_pgsz;
- buffer = malloc(BUFFER_SIZE);
- if (buffer == NULL)
- return (ENOMEM);
bytesize = 0;
- partial = 0;
- while (1) {
- rdsz = read(fd, buffer, BUFFER_SIZE);
- if (rdsz <= 0) {
- error = (rdsz < 0) ? errno : 0;
- break;
- }
- if (partial)
- abort();
- bytesize += rdsz;
- bcnt = (rdsz + secsz - 1) / secsz;
- error = image_write(blk, buffer, bcnt);
+ do {
+ swofs = image_swap_alloc(iosz);
+ if (swofs == -1LL)
+ return (errno);
+ buffer = image_file_map(image_swap_fd, swofs, iosz);
+ if (buffer == NULL)
+ return (errno);
+ rdsz = read(fd, buffer, iosz);
+ if (rdsz > 0)
+ error = image_chunk_copyin(blk, buffer, rdsz, swofs,
+ image_swap_fd);
+ else if (rdsz < 0)
+ error = errno;
+ else
+ error = 0;
+ image_file_unmap(buffer, iosz);
+ /* XXX should we relinguish unused swap space? */
if (error)
- break;
- blk += bcnt;
- partial = ((ssize_t)(bcnt * secsz) != rdsz) ? 1 : 0;
- }
- free(buffer);
+ return (error);
+
+ bytesize += rdsz;
+ blk += (rdsz + secsz - 1) / secsz;
+ } while (rdsz > 0);
+
if (sizep != NULL)
*sizep = bytesize;
+ return (0);
+}
+
+static int
+image_copyin_mapped(lba_t blk, int fd, uint64_t *sizep)
+{
+
+ return (image_copyin_stream(blk, fd, sizep));
+}
+
+int
+image_copyin(lba_t blk, int fd, uint64_t *sizep)
+{
+ struct stat sb;
+ int error;
+
+ error = image_chunk_skipto(blk);
+ if (!error) {
+ if (fstat(fd, &sb) == -1 || !S_ISREG(sb.st_mode))
+ error = image_copyin_stream(blk, fd, sizep);
+ else
+ error = image_copyin_mapped(blk, fd, sizep);
+ }
return (error);
}
@@ -120,23 +356,25 @@ image_copyout_region(int fd, lba_t blk,
{
char *buffer;
off_t ofs;
- size_t sz;
+ size_t bufsz, sz;
ssize_t rdsz, wrsz;
int error;
+ bufsz = secsz * image_swap_pgsz;
+
ofs = lseek(fd, 0L, SEEK_CUR);
blk *= secsz;
- if (lseek(image_fd, blk, SEEK_SET) != blk)
+ if (lseek(image_swap_fd, blk, SEEK_SET) != blk)
return (errno);
- buffer = malloc(BUFFER_SIZE);
+ buffer = malloc(bufsz);
if (buffer == NULL)
return (errno);
error = 0;
size *= secsz;
while (size > 0) {
- sz = (BUFFER_SIZE < size) ? BUFFER_SIZE : size;
- rdsz = read(image_fd, buffer, sz);
+ sz = ((ssize_t)bufsz < size) ? bufsz : (size_t)size;
+ rdsz = read(image_swap_fd, buffer, sz);
if (rdsz <= 0) {
error = (rdsz < 0) ? errno : 0;
break;
@@ -161,7 +399,7 @@ image_data(lba_t blk, lba_t size)
char *buffer, *p;
blk *= secsz;
- if (lseek(image_fd, blk, SEEK_SET) != blk)
+ if (lseek(image_swap_fd, blk, SEEK_SET) != blk)
return (1);
size *= secsz;
@@ -169,7 +407,7 @@ image_data(lba_t blk, lba_t size)
if (buffer == NULL)
return (1);
- if (read(image_fd, buffer, size) != (ssize_t)size) {
+ if (read(image_swap_fd, buffer, size) != (ssize_t)size) {
free(buffer);
return (1);
}
@@ -185,7 +423,12 @@ image_data(lba_t blk, lba_t size)
lba_t
image_get_size(void)
{
+ static int once = 0;
+ if (once == 0) {
+ once++;
+ image_chunk_dump();
+ }
return (image_size);
}
@@ -193,8 +436,10 @@ int
image_set_size(lba_t blk)
{
+ image_chunk_skipto(blk);
+
image_size = blk;
- if (ftruncate(image_fd, blk * secsz) == -1)
+ if (ftruncate(image_swap_fd, blk * secsz) == -1)
return (errno);
return (0);
}
@@ -204,27 +449,42 @@ image_write(lba_t blk, void *buf, ssize_
{
blk *= secsz;
- if (lseek(image_fd, blk, SEEK_SET) != blk)
+ if (lseek(image_swap_fd, blk, SEEK_SET) != blk)
return (errno);
len *= secsz;
- if (sparse_write(image_fd, buf, len) != len)
+ if (sparse_write(image_swap_fd, buf, len) != len)
return (errno);
return (0);
}
+static void
+image_cleanup(void)
+{
+
+ if (image_swap_fd != -1)
+ close(image_swap_fd);
+ unlink(image_swap_file);
+}
+
int
image_init(void)
{
const char *tmpdir;
- if (atexit(cleanup) == -1)
+ STAILQ_INIT(&image_chunks);
+ image_nchunks = 0;
+
+ image_swap_size = 0;
+ image_swap_pgsz = getpagesize();
+
+ if (atexit(image_cleanup) == -1)
return (errno);
if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0')
tmpdir = _PATH_TMP;
- snprintf(image_tmpfile, sizeof(image_tmpfile), "%s/mkimg-XXXXXX",
+ snprintf(image_swap_file, sizeof(image_swap_file), "%s/mkimg-XXXXXX",
tmpdir);
- image_fd = mkstemp(image_tmpfile);
- if (image_fd == -1)
+ image_swap_fd = mkstemp(image_swap_file);
+ if (image_swap_fd == -1)
return (errno);
return (0);
}
More information about the svn-src-user
mailing list