svn commit: r272233 - user/marcel/mkimg

Marcel Moolenaar marcel at FreeBSD.org
Sun Sep 28 00:43:05 UTC 2014


Author: marcel
Date: Sun Sep 28 00:43:04 2014
New Revision: 272233
URL: http://svnweb.freebsd.org/changeset/base/272233

Log:
  Start performance optimizations:
  Output formats typically need to know whether a sequence of blocks has data
  or not. They use this to determine whether to allocate disk space for them
  or not. The image_data() function provides that functionality, but is
  implemented by reading the amount of blocks from the temporary file and
  comparing that with zeroes. The QCOW format needs to go over the image 3
  times and every time we read from the temporary file.
  
  We can speed this up by building a "chunk" list in memory while we read the
  partition data. Each chunk is a sequence of blocks that is either defined
  as a gap (i.e. all zeroes) or defined as containing data. For each chunk we
  keep track of the file and the offset in that file where the chunk's data
  comes from. This allows us handle regular files and in particular sparse
  files more optimally.
  
  For sparse files we can trivially build a chunk for each of the holes in
  the parse file by using SEEK_HOLE and SEEK_DATA. The data regions still
  need to be read to handle zeroe blocks for block sizes smaller that the
  underlying file system's block size. However, we don't have to copy the
  data into a temporary file anymore.
  
  For anything but regular files, we still use the temporary file. We call it
  a swap file now. With that all data can be mapped and unmapped as we need
  to access it.
  
  This commit implements the creation of the chunk list and the swap file
  usage for non-regular files (i.e. streams). Mappable files are now handled
  like streams, so that needs some work.
  
  The big part that is missing is the use of the chunk list for determining
  whether a sequence of blocks has data and all the handling of writing the
  image data to the output file.
  
  As such: this commit breaks mkimg and makes it useless. It's a good WIP
  to safe thogh -- hence doing it on my branch.

Modified:
  user/marcel/mkimg/image.c

Modified: user/marcel/mkimg/image.c
==============================================================================
--- user/marcel/mkimg/image.c	Sun Sep 28 00:24:01 2014	(r272232)
+++ user/marcel/mkimg/image.c	Sun Sep 28 00:43:04 2014	(r272233)
@@ -27,67 +27,303 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include <sys/mman.h>
+#include <sys/queue.h>
+#include <sys/stat.h>
 #include <sys/types.h>
 #include <assert.h>
 #include <errno.h>
 #include <limits.h>
 #include <paths.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 #include <unistd.h>
 
 #include "image.h"
 #include "mkimg.h"
 
-#define	BUFFER_SIZE	(1024*1024)
+struct chunk {
+	lba_t	ch_block;		/* Block address in image. */
+	off_t	ch_ofs;			/* Offset in backing file. */
+	STAILQ_ENTRY(chunk) ch_list;
+	size_t	ch_size;		/* Size of chunk in bytes. */
+	int	ch_fd;			/* FD of backing file. */
+	u_int	ch_flags;
+#define	CH_FLAGS_GAP		1	/* Chunk is a gap (no FD). */
+#define	CH_FLAGS_DIRTY		2	/* Data modified/only in memory. */
+};
+
+static STAILQ_HEAD(chunk_head, chunk) image_chunks;
+static u_int image_nchunks;
+
+static char image_swap_file[PATH_MAX];
+static int image_swap_fd = -1;
+static u_int image_swap_pgsz;
+static off_t image_swap_size;
 
-static char image_tmpfile[PATH_MAX];
-static int image_fd = -1;
 static lba_t image_size;
 
+/*
+ * Swap file handlng.
+ */
+
+static off_t
+image_swap_alloc(size_t size)
+{
+	off_t ofs;
+	size_t unit;
+
+	unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz;
+	assert((unit & (unit - 1)) == 0);
+
+	size = (size + unit - 1) & ~(unit - 1);
+
+	ofs = image_swap_size;
+	image_swap_size += size;
+	if (ftruncate(image_swap_fd, image_swap_size) == -1) {
+		image_swap_size = ofs;
+		ofs = -1LL;
+	}
+	fprintf(stderr, "SWAP: off=%jd, size=%zu\n", (intmax_t)ofs, size);
+	return (ofs);
+}
+
+/*
+ * Image chunk handling.
+ */
+
 static void
-cleanup(void)
+image_chunk_dump(void)
 {
+	struct chunk *ch;
 
-	if (image_fd != -1)
-		close(image_fd);
-	unlink(image_tmpfile);
+	fprintf(stderr, "%u chunks:\n", image_nchunks);
+	STAILQ_FOREACH(ch, &image_chunks, ch_list) {
+		fprintf(stderr, "\tblk=%jd, ofs=%jd, fd=%d, sz=%zu, fl=%u\n",
+		    (intmax_t)ch->ch_block, (intmax_t)ch->ch_ofs, ch->ch_fd,
+		    ch->ch_size, ch->ch_flags);
+	}
 }
 
-int
-image_copyin(lba_t blk, int fd, uint64_t *sizep)
+static size_t
+image_chunk_grow(struct chunk *ch, size_t sz)
+{
+	size_t dsz, newsz;
+
+	newsz = ch->ch_size + sz;
+	if (newsz > ch->ch_size) {
+		ch->ch_size = newsz;
+		return (0);
+	}
+	/* We would overflow -- create new chunk for remainder. */
+	dsz = SIZE_MAX - ch->ch_size;
+	assert(dsz < sz);
+	ch->ch_size = SIZE_MAX;
+	return (sz - dsz);
+}
+
+static int
+image_chunk_skipto(lba_t to)
+{
+	struct chunk *ch;
+	lba_t from;
+	size_t sz;
+
+	ch = STAILQ_LAST(&image_chunks, chunk, ch_list);
+	from = (ch != NULL) ? ch->ch_block + (ch->ch_size / secsz) : 0LL;
+
+	assert(from <= to);
+
+	/* Nothing to do? */
+	if (from == to)
+		return (0);
+	/* Avoid bugs due to overflows. */
+	if ((uintmax_t)(to - from) > (uintmax_t)(SIZE_MAX / secsz))
+		return (EFBIG);
+	sz = (to - from) * secsz;
+	if (ch != NULL && (ch->ch_flags & CH_FLAGS_GAP)) {
+		sz = image_chunk_grow(ch, sz);
+		if (sz == 0)
+			return (0);
+		from = ch->ch_block + (ch->ch_size / secsz);
+	}
+	ch = malloc(sizeof(*ch));
+	if (ch == NULL)
+		return (ENOMEM);
+	memset(ch, 0, sizeof(*ch));
+	ch->ch_block = from;
+	ch->ch_size = sz;
+	ch->ch_fd = -1;
+	ch->ch_flags |= CH_FLAGS_GAP;
+	STAILQ_INSERT_TAIL(&image_chunks, ch, ch_list);
+	image_nchunks++;
+	return (0);
+}
+
+static int
+image_chunk_append(lba_t blk, size_t sz, off_t ofs, int fd)
+{
+	struct chunk *ch;
+
+	ch = STAILQ_LAST(&image_chunks, chunk, ch_list);
+	if (ch != NULL && (ch->ch_flags & CH_FLAGS_GAP) == 0) {
+		if (fd == ch->ch_fd &&
+		    blk == (lba_t)(ch->ch_block + (ch->ch_size / secsz)) &&
+		    ofs == (off_t)(ch->ch_ofs + ch->ch_size)) {
+			sz = image_chunk_grow(ch, sz);
+			if (sz == 0)
+				return (0);
+			blk = ch->ch_block + (ch->ch_size / secsz);
+			ofs = ch->ch_ofs + ch->ch_size;
+		}
+	}
+	ch = malloc(sizeof(*ch));
+	if (ch == NULL)
+		return (ENOMEM);
+	memset(ch, 0, sizeof(*ch));
+	ch->ch_block = blk;
+	ch->ch_ofs = ofs;
+	ch->ch_size = sz;
+	ch->ch_fd = fd;
+	STAILQ_INSERT_TAIL(&image_chunks, ch, ch_list);
+	image_nchunks++;
+	return (0);
+}
+
+static int
+image_chunk_copyin(lba_t blk, void *buf, size_t sz, off_t ofs, int fd)
+{
+	uint64_t *p = buf;
+	size_t n;
+	int error;
+
+	assert(((uintptr_t)p & 3) == 0);
+
+	error = 0;
+	sz = (sz + secsz - 1) & ~(secsz - 1);
+	while (!error && sz > 0) {
+		n = 0;
+		while (n < (secsz >> 3) && p[n] == 0)
+			n++;
+		if (n == (secsz >> 3))
+			error = image_chunk_skipto(blk + 1);
+		else
+			error = image_chunk_append(blk, secsz, ofs, fd);
+		blk++;
+		p += (secsz >> 3);
+		sz -= secsz;
+		ofs += secsz;
+	}
+	return (error);
+}
+
+/*
+ * File mapping support.
+ */
+
+static void *
+image_file_map(int fd, off_t ofs, size_t sz)
+{
+	void *ptr;
+	size_t unit;
+	int flags, prot;
+
+	unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz;
+	assert((unit & (unit - 1)) == 0);
+
+	flags = MAP_NOCORE | MAP_NOSYNC | MAP_SHARED;
+	/* Allow writing to our swap file only. */
+	prot = PROT_READ | ((fd == image_swap_fd) ? PROT_WRITE : 0);
+	sz = (sz + unit - 1) & ~(unit - 1);
+	ptr = mmap(NULL, sz, prot, flags, fd, ofs);
+	return ((ptr == MAP_FAILED) ? NULL : ptr);
+}
+
+static int
+image_file_unmap(void *buffer, size_t sz)
+{
+	size_t unit;
+
+	unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz;
+	sz = (sz + unit - 1) & ~(unit - 1);
+	munmap(buffer, sz);
+	return (0);
+}
+
+/*
+ * Input/source file handling.
+ */
+
+static int
+image_copyin_stream(lba_t blk, int fd, uint64_t *sizep)
 {
 	char *buffer;
 	uint64_t bytesize;
-	ssize_t bcnt, rdsz;
-	int error, partial;
+	off_t swofs;
+	size_t iosz;
+	ssize_t rdsz;
+	int error;
 
-	assert(BUFFER_SIZE % secsz == 0);
+	/*
+	 * This makes sure we're doing I/O in multiples of the page
+	 * size as well as of the sector size. 2MB is the minimum
+	 * by virtue of secsz at least 512 bytes and the page size
+	 * at least 4K bytes.
+	 */
+	iosz = secsz * image_swap_pgsz;
 
-	buffer = malloc(BUFFER_SIZE);
-	if (buffer == NULL)
-		return (ENOMEM);
 	bytesize = 0;
-	partial = 0;
-	while (1) {
-		rdsz = read(fd, buffer, BUFFER_SIZE);
-		if (rdsz <= 0) {
-			error = (rdsz < 0) ? errno : 0;
-			break;
-		}
-		if (partial)
-			abort();
-		bytesize += rdsz;
-		bcnt = (rdsz + secsz - 1) / secsz;
-		error = image_write(blk, buffer, bcnt);
+	do {
+		swofs = image_swap_alloc(iosz);
+		if (swofs == -1LL)
+			return (errno);
+		buffer = image_file_map(image_swap_fd, swofs, iosz);
+		if (buffer == NULL)
+			return (errno);
+		rdsz = read(fd, buffer, iosz);
+		if (rdsz > 0)
+			error = image_chunk_copyin(blk, buffer, rdsz, swofs,
+			    image_swap_fd);
+		else if (rdsz < 0)
+			error = errno;
+		else
+			error = 0;
+		image_file_unmap(buffer, iosz);
+		/* XXX should we relinguish unused swap space? */
 		if (error)
-			break;
-		blk += bcnt;
-		partial = ((ssize_t)(bcnt * secsz) != rdsz) ? 1 : 0;
-	}
-	free(buffer);
+			return (error);
+
+		bytesize += rdsz;
+		blk += (rdsz + secsz - 1) / secsz;
+	} while (rdsz > 0);
+
 	if (sizep != NULL)
 		*sizep = bytesize;
+	return (0);
+}
+
+static int
+image_copyin_mapped(lba_t blk, int fd, uint64_t *sizep)
+{
+
+	return (image_copyin_stream(blk, fd, sizep));
+}
+
+int
+image_copyin(lba_t blk, int fd, uint64_t *sizep)
+{
+	struct stat sb;
+	int error;
+
+	error = image_chunk_skipto(blk);
+	if (!error) {
+		if (fstat(fd, &sb) == -1 || !S_ISREG(sb.st_mode))
+			error = image_copyin_stream(blk, fd, sizep);
+		else
+			error = image_copyin_mapped(blk, fd, sizep);
+	}
 	return (error);
 }
 
@@ -120,23 +356,25 @@ image_copyout_region(int fd, lba_t blk, 
 {
 	char *buffer;
 	off_t ofs;
-	size_t sz;
+	size_t bufsz, sz;
 	ssize_t rdsz, wrsz;
 	int error;
 
+	bufsz = secsz * image_swap_pgsz;
+
 	ofs = lseek(fd, 0L, SEEK_CUR);
 
 	blk *= secsz;
-	if (lseek(image_fd, blk, SEEK_SET) != blk)
+	if (lseek(image_swap_fd, blk, SEEK_SET) != blk)
 		return (errno);
-	buffer = malloc(BUFFER_SIZE);
+	buffer = malloc(bufsz);
 	if (buffer == NULL)
 		return (errno);
 	error = 0;
 	size *= secsz;
 	while (size > 0) {
-		sz = (BUFFER_SIZE < size) ? BUFFER_SIZE : size;
-		rdsz = read(image_fd, buffer, sz);
+		sz = ((ssize_t)bufsz < size) ? bufsz : (size_t)size;
+		rdsz = read(image_swap_fd, buffer, sz);
 		if (rdsz <= 0) {
 			error = (rdsz < 0) ? errno : 0;
 			break;
@@ -161,7 +399,7 @@ image_data(lba_t blk, lba_t size)
 	char *buffer, *p;
 
 	blk *= secsz;
-	if (lseek(image_fd, blk, SEEK_SET) != blk)
+	if (lseek(image_swap_fd, blk, SEEK_SET) != blk)
 		return (1);
 
 	size *= secsz;
@@ -169,7 +407,7 @@ image_data(lba_t blk, lba_t size)
 	if (buffer == NULL)
 		return (1);
 
-	if (read(image_fd, buffer, size) != (ssize_t)size) {
+	if (read(image_swap_fd, buffer, size) != (ssize_t)size) {
 		free(buffer);
 		return (1);
 	}
@@ -185,7 +423,12 @@ image_data(lba_t blk, lba_t size)
 lba_t
 image_get_size(void)
 {
+	static int once = 0;
 
+	if (once == 0) {
+		once++;
+		image_chunk_dump();
+	}
 	return (image_size);
 }
 
@@ -193,8 +436,10 @@ int
 image_set_size(lba_t blk)
 {
 
+	image_chunk_skipto(blk);
+
 	image_size = blk;
-	if (ftruncate(image_fd, blk * secsz) == -1)
+	if (ftruncate(image_swap_fd, blk * secsz) == -1)
 		return (errno);
 	return (0);
 }
@@ -204,27 +449,42 @@ image_write(lba_t blk, void *buf, ssize_
 {
 
 	blk *= secsz;
-	if (lseek(image_fd, blk, SEEK_SET) != blk)
+	if (lseek(image_swap_fd, blk, SEEK_SET) != blk)
 		return (errno);
 	len *= secsz;
-	if (sparse_write(image_fd, buf, len) != len)
+	if (sparse_write(image_swap_fd, buf, len) != len)
 		return (errno);
 	return (0);
 }
 
+static void
+image_cleanup(void)
+{
+
+	if (image_swap_fd != -1)
+		close(image_swap_fd);
+	unlink(image_swap_file);
+}
+
 int
 image_init(void)
 {
 	const char *tmpdir;
 
-	if (atexit(cleanup) == -1)
+	STAILQ_INIT(&image_chunks);
+	image_nchunks = 0;
+
+	image_swap_size = 0;
+	image_swap_pgsz = getpagesize();
+
+	if (atexit(image_cleanup) == -1)
 		return (errno);
 	if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0')
 		tmpdir = _PATH_TMP;
-	snprintf(image_tmpfile, sizeof(image_tmpfile), "%s/mkimg-XXXXXX",
+	snprintf(image_swap_file, sizeof(image_swap_file), "%s/mkimg-XXXXXX",
 	    tmpdir);
-	image_fd = mkstemp(image_tmpfile);
-	if (image_fd == -1)
+	image_swap_fd = mkstemp(image_swap_file);
+	if (image_swap_fd == -1)
 		return (errno);
 	return (0);
 }


More information about the svn-src-user mailing list