git: 32a4a2680796 - main - Synchronize recoverdisk(1) with the datamuseum.dk version.

Go to: [ bottom of page ] [ top of archives ] [ this month ]
From: Poul-Henning Kamp <phk_at_FreeBSD.org>
Date: Tue, 22 Jul 2025 19:57:23 UTC
The branch main has been updated by phk:

URL: https://cgit.FreeBSD.org/src/commit/?id=32a4a2680796d2ba234e9fd9766ed695ab1a5909

commit 32a4a2680796d2ba234e9fd9766ed695ab1a5909
Author:     Poul-Henning Kamp <phk@FreeBSD.org>
AuthorDate: 2025-07-22 19:54:13 +0000
Commit:     Poul-Henning Kamp <phk@FreeBSD.org>
CommitDate: 2025-07-22 19:54:13 +0000

    Synchronize recoverdisk(1) with the datamuseum.dk version.
---
 sbin/recoverdisk/recoverdisk.1 | 258 +++++++++-----
 sbin/recoverdisk/recoverdisk.c | 766 +++++++++++++++++++++++++++--------------
 2 files changed, 681 insertions(+), 343 deletions(-)

diff --git a/sbin/recoverdisk/recoverdisk.1 b/sbin/recoverdisk/recoverdisk.1
index 2999ac6ec409..9f1deb4c0c23 100644
--- a/sbin/recoverdisk/recoverdisk.1
+++ b/sbin/recoverdisk/recoverdisk.1
@@ -27,7 +27,7 @@
 .Os
 .Sh NAME
 .Nm recoverdisk
-.Nd recover data from hard disk or optical media
+.Nd recover data from disk-like devices.
 .Sh SYNOPSIS
 .Nm
 .Op Fl b Ar bigsize
@@ -41,79 +41,101 @@
 .Sh DESCRIPTION
 The
 .Nm
-utility reads data from the
+utility reads all data from the
 .Ar source
-file until all blocks could be successfully read.
+and retries read operations until they succeed.
 If
 .Ar destination
-was specified all data is being written to that file.
-It starts reading in multiples of the sector size.
-Whenever a block fails, it is put to the end of the working queue and will be
-read again, possibly with a smaller read size.
+is specified all data read be written there.
 .Pp
-By default it uses block sizes of roughly 1 MB, 32kB, and the native
-sector size (usually 512 bytes).
-These figures are adjusted slightly, for devices whose sectorsize is not a
-power of 2, e.g., audio CDs with a sector size of 2352 bytes.
+The internal work-list can be saved and loaded so that
+.Nm
+sessions can be resumed, for instance when a marginal
+source hard-disk shuts down.
+.Pp
+The work-list is initialized with a single item which covers the entire
+.Ar source 
+and
+.Nm
+always chips away at the first item on the work-list.
+
+When a read succeeds, that part of the current chunk is eliminated
+from the work-list.
+
+When a read fails, that part of the item is appended to the worklist
+as a separate item, and will be retried in due order.
+If
+.Ar destination
+is specified, the corresponding range is filled with '_UNREAD_'.
+.Pp
+The first pass attempts to read everything in "big-size" chunks,
+the second pass reads in "medium-size" chunks and third and subsequent
+passes read in "small-size" chunks.  This three stage process is
+an attempt to optimize the case where only a few bad blocks exist
+on
+.Ar source .
+If too many read-errors are encountered,
+.Nm
+will fall back to smaller sizes sooner.
+.Pp
+The three sizes default to 128kB (or less if the sector size does
+not divide 128kB cleanly, for instance audio CD media), and the
+reported
+.Dv DIOCGSTRIPESIZE
+and
+.Dv DIOCGSECTORSIZE
+respectively.
 .Pp
 The options are as follows:
 .Bl -tag -width indent
 .It Fl b Ar bigsize
-The size of reads attempted first.
-The middle pass is roughly the logarithmic average of the bigsize and
-the sectorsize.
-.It Fl r Ar readlist
-Read the list of blocks and block sizes to read from the specified file.
-.It Fl s Ar interval
-How often we should update the writelist file while things go OK.
-The default is 60 and the unit is "progress messages" so if things
-go well, this is the same as once per minute.
+The size of reads attempted in first pass.
+.It Fl m Ar mediumsize
+The size of reads attempted in second pass.
+.It Fl s Ar smallsize
+The size of reads attempted in third and subsequent passes.
+.It Fl r Ar work-list-file
+Read the work-list from a file.
+.It Fl w Ar work-list-file
+Write the work-list to a file when a read succeed, but at most once
+every minute.
+.It Fl l Ar log-file
+Each successful read is logged with timestamp, offset and length.
+.It Fl t Ar totalsize
+How many bytes should be recovered.  The default is what
+.Dv DIOCGMEDIASIZE
+reports for character and block devices or
+.Dv st_size
+if
+.Ar source
+is a regular file.
+.It Fl p Ar pause
+.Xr sleep 3
+this long whenever a read fails.  This makes the
+.Ar source
+device look less sick to the operating system.
 .It Fl u Ar pattern
-By default blocks which encounter read errors will be filled with
-the pattern
+By default blocks which cannot be read are filled with the pattern
 .Ql _UNREAD_
-in the output file.
-This option can be
-used to specify another pattern.
-Nothing gets written if the string is empty.
+in the output file.  This option can be used to specify a different
+pattern.  If the pattern is the empty string, nothing is written.
 .It Fl v
-Enables nicer status report using ANSI escapes and UTF-8.
-.It Fl w Ar writelist
-Write the list of remaining blocks to read to the specified file if
-.Nm
-is aborted via
-.Dv SIGINT .
+Produce a detailed progress report with ANSI escapes and UTF-8.
 .El
 .Pp
-The
-.Fl r
-and
-.Fl w
-options can be specified together.
-Especially, they can point to the same file, which will be updated on abort.
-.Sh OUTPUT
-The
 .Nm
-utility
-prints several columns, detailing the progress
-.Bl -tag -width remaining
-.It Va start
-Starting offset of the current block.
-.It Va size
-Read size of the current block.
-.It Va len
-Length of the current block.
-.It Va state
-Is increased for every failed read.
-.It Va done
-Number of bytes already read.
-.It Va remaining
-Number of bytes remaining.
-.It Va "% done"
-Percent complete.
-.El
+can be aborted with
+.Dv SIGINT ,
+but with a sick
+.Ar source
+it may take up to several minutes before the current read operation
+returns from the kernel.
+.Pp
 .Sh EXAMPLES
 .Bd -literal
+# check if all sectors can be read on a USB stick:
+recoverdisk /dev/da0
+
 # recover data from failing hard drive ada3
 recoverdisk /dev/ada3 /data/disk.img
 
@@ -129,10 +151,72 @@ recoverdisk -r worklist -w worklist /dev/cd0 /data/cd.iso
 # recover a single file from the unreadable media
 recoverdisk /cdrom/file.avi file.avi
 
-# If the disk hangs the system on read-errors try:
-recoverdisk -b 0 /dev/ada3 /somewhere
-
 .Ed
+.Sh PRACTICAL ADVICE
+In Datamuseum.dk
+.Nm
+has been used to recover all sorts of data-media for two decades,
+here are some things we have learned:
+.Bl -bullet
+.It
+Interacting with failing hardware has a tendency to crash machines,
+so it is always a good idea to use the
+.Fl -w work-list-file
+so that it is possible to continue.
+.It
+When attempting to recover hard to read data from failing hard disks,
+it pays to pamper the drive as much as possible:
+.It
+It is generally best to keep the drive in it's usual physical orientation,
+but it can also help to try other orientations.
+.It
+Insulate the drive from external vibrations.
+.It
+Keep the drive cool with a fan.
+.It
+If possible, power the drive from a laboratory power supply.
+.It
+Do not loose patience:  Let
+.Nm
+run as long as possible.
+.It
+(S)ATA controllers do not handle failing disks well, if this
+is a problem, use a USB-(S)ATA adapter instead.
+.It
+The
+.Nm
+source code is deliberately written to be easily portable to
+older versions of 
+.Fx
+and to other operating systems.
+.It
+If you need to read ST-506, RLL or ESDI drives
+.Fx 3.5.1
+is a good compromise.
+.It
+Sometimes forcing the disk to step between reads helps.
+Since
+.Nm
+process the work-list in the order it is read, this
+can be accomplished by sorting the work-list with
+something like:
+.Dl % sort +0.5
+.It
+By default the
+.Xr CAM
+layer will retry failing read operations, but that
+will get stuck on the bad sectors for long time
+and delay recovering what actually can be read from
+a rapidly failing drive.
+In that situation, set the appropriate
+.Dl kern.cam.*.retry_count
+sysctl to zero.
+.It
+For floppies and un-zoned hard disks (ST-506 to
+early IDE) set
+.Fl b Ar bigsize
+to the size of a track.
+.El
 .Sh SEE ALSO
 .Xr dd 1 ,
 .Xr ada 4 ,
@@ -143,7 +227,8 @@ recoverdisk -b 0 /dev/ada3 /somewhere
 The
 .Nm
 utility first appeared in
-.Fx 7.0 .
+.Fx 7.0 
+because Somebody™ forgot to make a backup copy.
 .Sh AUTHORS
 .An -nosplit
 The original implementation was done by
@@ -151,34 +236,29 @@ The original implementation was done by
 with minor improvements from
 .An Ulrich Sp\(:orlein Aq Mt uqs@FreeBSD.org .
 .Pp
-This manual page was written by
+This manual page was originally written by
 .An Ulrich Sp\(:orlein .
 .Sh BUGS
-Reading from media where the sectorsize is not a power of 2 will make all
-1 MB reads fail.
-This is due to the DMA reads being split up into blocks of at most 128kB.
-These reads then fail if the sectorsize is not a divisor of 128kB.
-When reading a full raw audio CD, this leads to roughly 700 error messages
-flying by.
-This is harmless and can be avoided by setting
-.Fl b
-to no more than 128kB.
+If a failing device causes the machine to crash, there is
+a risk that a chunk might have been successfully read
+and removed from the work-list, but not yet flushed to
+the
+.Ar destination .
 .Pp
 .Nm
-needs to know about read errors as fast as possible, i.e., retries by lower
-layers will usually slow down the operation.
-When using
-.Xr cam 4
-attached drives, you may want to set kern.cam.XX.retry_count to zero, e.g.:
-.Bd -literal
-# sysctl kern.cam.ada.retry_count=0
-# sysctl kern.cam.cd.retry_count=0
-# sysctl kern.cam.da.retry_count=0
-.Ed
-.\".Pp
-.\"When reading from optical media, a bug in the GEOM framework will
-.\"prevent it from seeing that the media has been removed.
-.\"The device can still be opened, but all reads will fail.
-.\"This is usually harmless, but will send
-.\".Nm
-.\"into an infinite loop.
+calls
+.Xr fdatasync 3
+on the destination before writing the work-list to a
+temporary file, and calls it again on the temporary
+file before renaming it to the specified
+.Fl w Ar work-file-list
+filename.
+But even then things dont always work out.
+.Pp
+.Nm
+should have an option for reconstructing the work-list
+from the
+.Ar destination
+by enumerating the
+.Fl u Ar pattern
+filled ranges.
diff --git a/sbin/recoverdisk/recoverdisk.c b/sbin/recoverdisk/recoverdisk.c
index 446266c36d50..e1b283e54a93 100644
--- a/sbin/recoverdisk/recoverdisk.c
+++ b/sbin/recoverdisk/recoverdisk.c
@@ -8,6 +8,7 @@
  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
  * ----------------------------------------------------------------------------
  */
+
 #include <sys/param.h>
 #include <sys/queue.h>
 #include <sys/disk.h>
@@ -27,18 +28,10 @@
 #include <time.h>
 #include <unistd.h>
 
-/* Safe printf into a fixed-size buffer */
-#define bprintf(buf, fmt, ...)                                          \
-	do {                                                            \
-		int ibprintf;                                           \
-		ibprintf = snprintf(buf, sizeof buf, fmt, __VA_ARGS__); \
-		assert(ibprintf >= 0 && ibprintf < (int)sizeof buf);    \
-	} while (0)
-
 struct lump {
-	off_t			start;
-	off_t			len;
-	int			state;
+	uint64_t		start;
+	uint64_t		len;
+	unsigned		pass;
 	TAILQ_ENTRY(lump)	list;
 };
 
@@ -46,25 +39,32 @@ struct period {
 	time_t			t0;
 	time_t			t1;
 	char			str[20];
-	off_t			bytes_read;
+	uint64_t		bytes_read;
 	TAILQ_ENTRY(period)	list;
 };
 TAILQ_HEAD(period_head, period);
 
 static volatile sig_atomic_t aborting = 0;
 static int verbose = 0;
-static size_t bigsize = 1024 * 1024;
-static size_t medsize;
-static size_t minsize = 512;
-static off_t tot_size;
-static off_t done_size;
+static uint64_t big_read;
+static uint64_t medium_read;
+static uint64_t small_read;
+static uint64_t total_size;
+static uint64_t done_size;
 static char *input;
-static char *wworklist = NULL;
-static char *rworklist = NULL;
+static char *write_worklist_file = NULL;
+static char *read_worklist_file = NULL;
 static const char *unreadable_pattern = "_UNREAD_";
-static const int write_errors_are_fatal = 1;
-static int fdr, fdw;
-
+static int write_errors_are_fatal = 1;
+static int read_fd, write_fd;
+static FILE *log_file = NULL;
+static char *work_buf;
+static char *pattern_buf;
+static double error_pause;
+
+static unsigned nlumps;
+static double n_reads, n_good_reads;
+static time_t t_first;
 static TAILQ_HEAD(, lump) lumps = TAILQ_HEAD_INITIALIZER(lumps);
 static struct period_head minute = TAILQ_HEAD_INITIALIZER(minute);
 static struct period_head quarter = TAILQ_HEAD_INITIALIZER(quarter);
@@ -74,7 +74,8 @@ static struct period_head day = TAILQ_HEAD_INITIALIZER(quarter);
 /**********************************************************************/
 
 static void
-report_good_read2(time_t now, size_t bytes, struct period_head *ph, time_t dt)
+account_good_read_period(time_t now, uint64_t bytes,
+    struct period_head *ph, time_t dt)
 {
 	struct period *pp;
 	const char *fmt;
@@ -82,7 +83,7 @@ report_good_read2(time_t now, size_t bytes, struct period_head *ph, time_t dt)
 
 	pp = TAILQ_FIRST(ph);
 	if (pp == NULL || pp->t1 < now) {
-		pp = calloc(1, sizeof(*pp));
+		pp = calloc(1UL, sizeof(*pp));
 		assert(pp != NULL);
 		pp->t0 = (now / dt) * dt;
 		pp->t1 = (now / dt + 1) * dt;
@@ -98,13 +99,13 @@ report_good_read2(time_t now, size_t bytes, struct period_head *ph, time_t dt)
 }
 
 static void
-report_good_read(time_t now, size_t bytes)
+account_good_read(time_t now, uint64_t bytes)
 {
 
-	report_good_read2(now, bytes, &minute, 60L);
-	report_good_read2(now, bytes, &quarter, 900L);
-	report_good_read2(now, bytes, &hour, 3600L);
-	report_good_read2(now, bytes, &day, 86400L);
+	account_good_read_period(now, bytes, &minute, 60L);
+	account_good_read_period(now, bytes, &quarter, 900L);
+	account_good_read_period(now, bytes, &hour, 3600L);
+	account_good_read_period(now, bytes, &day, 86400L);
 }
 
 static void
@@ -114,20 +115,18 @@ report_one_period(const char *period, struct period_head *ph)
 	int n;
 
 	n = 0;
-	printf("%s \xe2\x94\x82", period);
+	printf("%s ", period);
 	TAILQ_FOREACH(pp, ph, list) {
-		if (n == 3) {
+		if (++n == 4) {
 			TAILQ_REMOVE(ph, pp, list);
 			free(pp);
 			break;
 		}
-		if (n++)
-			printf("  \xe2\x94\x82");
-		printf("  %s %14jd", pp->str, pp->bytes_read);
+		printf("\xe2\x94\x82  %s %14ju  ",
+		    pp->str, (uintmax_t)pp->bytes_read);
 	}
 	for (; n < 3; n++) {
-		printf("  \xe2\x94\x82");
-		printf("  %5s %14s", "", "");
+		printf("\xe2\x94\x82  %5s %14s  ", "", "");
 	}
 	printf("\x1b[K\n");
 }
@@ -146,27 +145,23 @@ report_periods(void)
 static void
 set_verbose(void)
 {
-	struct winsize wsz;
 
-	if (!isatty(STDIN_FILENO) || ioctl(STDIN_FILENO, TIOCGWINSZ, &wsz))
-		return;
 	verbose = 1;
 }
 
 static void
-report_header(int eol)
+report_header(const char *term)
 {
-	printf("%13s %7s %13s %5s %13s %13s %9s",
+	printf("%13s %7s %13s %5s %13s %13s %9s%s",
 	    "start",
 	    "size",
 	    "block-len",
 	    "pass",
 	    "done",
 	    "remaining",
-	    "% done");
-	if (eol)
-		printf("\x1b[K");
-	putchar('\n');
+	    "% done",
+	    term
+	);
 }
 
 #define REPORTWID 79
@@ -186,20 +181,20 @@ report_hline(const char *how)
 	printf("\x1b[K\n");
 }
 
-static off_t hist[REPORTWID];
-static off_t last_done = -1;
+static uint64_t hist[REPORTWID];
+static uint64_t prev_done = ~0UL;
 
 static void
-report_histogram(const struct lump *lp)
+report_histogram(uint64_t start)
 {
-	off_t j, bucket, fp, fe, k, now;
+	uint64_t j, bucket, fp, fe, k, now;
 	double a;
 	struct lump *lp2;
 
-	bucket = tot_size / REPORTWID;
-	if (tot_size > bucket * REPORTWID)
+	bucket = total_size / REPORTWID;
+	if (total_size > bucket * REPORTWID)
 		bucket += 1;
-	if (done_size != last_done) {
+	if (done_size != prev_done) {
 		memset(hist, 0, sizeof hist);
 		TAILQ_FOREACH(lp2, &lumps, list) {
 			fp = lp2->start;
@@ -213,9 +208,9 @@ report_histogram(const struct lump *lp)
 				fp += k;
 			}
 		}
-		last_done = done_size;
+		prev_done = done_size;
 	}
-	now = lp->start / bucket;
+	now = start / bucket;
 	for (j = 0; j < REPORTWID; j++) {
 		a = round(8 * (double)hist[j] / bucket);
 		assert (a >= 0 && a < 9);
@@ -228,7 +223,7 @@ report_histogram(const struct lump *lp)
 		} else {
 			putchar(0xe2);
 			putchar(0x96);
-			putchar(0x80 + (int)a);
+			putchar(0x80 + (char)a);
 		}
 		if (j == now)
 			printf("\x1b[0m");
@@ -237,34 +232,40 @@ report_histogram(const struct lump *lp)
 }
 
 static void
-report(const struct lump *lp, size_t sz)
+report(uint64_t sz)
 {
 	struct winsize wsz;
+	const struct lump *lp = TAILQ_FIRST(&lumps);
 	int j;
-
-	assert(lp != NULL);
+	unsigned pass = 0;
+	uintmax_t start = 0, length = 0;
+	time_t t_now = time(NULL);
+
+	if (lp != NULL) {
+		pass = lp->pass;
+		start = lp->start;
+		length = lp->len;
+	}
 
 	if (verbose) {
 		printf("\x1b[H%s\x1b[K\n", input);
-		report_header(1);
-	} else {
-		putchar('\r');
+		report_header("\x1b[K\n");
 	}
 
-	printf("%13jd %7zu %13jd %5d %13jd %13jd %9.4f",
-	    (intmax_t)lp->start,
-	    sz,
-	    (intmax_t)lp->len,
-	    lp->state,
-	    (intmax_t)done_size,
-	    (intmax_t)(tot_size - done_size),
-	    100*(double)done_size/(double)tot_size
+	printf("%13ju %7ju %13ju %5u %13ju %13ju %9.4f",
+	    start,
+	    (uintmax_t)sz,
+	    length,
+	    pass,
+	    (uintmax_t)done_size,
+	    (uintmax_t)(total_size - done_size),
+	    100*(double)done_size/(double)total_size
 	);
 
 	if (verbose) {
 		printf("\x1b[K\n");
 		report_hline(NULL);
-		report_histogram(lp);
+		report_histogram(start);
 		if (TAILQ_EMPTY(&minute)) {
 			report_hline(NULL);
 		} else {
@@ -272,27 +273,36 @@ report(const struct lump *lp, size_t sz)
 			report_periods();
 			report_hline("\xe2\x94\xb4");
 		}
+		printf("Missing: %u", nlumps);
+		printf("  Success: %.0f/%.0f =", n_good_reads, n_reads);
+		printf(" %.4f%%", 100 * n_good_reads / n_reads);
+		printf("  Duration: %.3fs", (t_now - t_first) / n_reads);
+		printf("\x1b[K\n");
+		report_hline(NULL);
 		j = ioctl(STDIN_FILENO, TIOCGWINSZ, &wsz);
 		if (!j)
 			printf("\x1b[%d;1H", wsz.ws_row);
+	} else {
+		printf("\n");
 	}
-	fflush(stdout);
 }
 
 /**********************************************************************/
 
 static void
-new_lump(off_t start, off_t len, int state)
+new_lump(uint64_t start, uint64_t len, unsigned pass)
 {
 	struct lump *lp;
 
+	assert(len > 0);
 	lp = malloc(sizeof *lp);
 	if (lp == NULL)
 		err(1, "Malloc failed");
 	lp->start = start;
 	lp->len = len;
-	lp->state = state;
+	lp->pass = pass;
 	TAILQ_INSERT_TAIL(&lumps, lp, list);
+	nlumps += 1;
 }
 
 /**********************************************************************
@@ -306,98 +316,100 @@ save_worklist(void)
 	struct lump *llp;
 	char buf[PATH_MAX];
 
-	if (fdw >= 0 && fdatasync(fdw))
+	if (write_fd >= 0 && fdatasync(write_fd))
 		err(1, "Write error, probably disk full");
 
-	if (wworklist != NULL) {
-		bprintf(buf, "%s.tmp", wworklist);
-		(void)fprintf(stderr, "\nSaving worklist ...");
-		(void)fflush(stderr);
+	if (write_worklist_file != NULL) {
+		snprintf(buf, sizeof(buf), "%s.tmp", write_worklist_file);
+		fprintf(stderr, "\nSaving worklist ...");
 
 		file = fopen(buf, "w");
 		if (file == NULL)
 			err(1, "Error opening file %s", buf);
 
-		TAILQ_FOREACH(llp, &lumps, list)
-			fprintf(file, "%jd %jd %d\n",
-			    (intmax_t)llp->start, (intmax_t)llp->len,
-			    llp->state);
-		(void)fflush(file);
+		TAILQ_FOREACH(llp, &lumps, list) {
+			assert (llp->len > 0);
+			fprintf(file, "%ju %ju %u\n",
+			    (uintmax_t)llp->start,
+			    (uintmax_t)llp->len,
+			    llp->pass);
+		}
+		fflush(file);
 		if (ferror(file) || fdatasync(fileno(file)) || fclose(file))
 			err(1, "Error writing file %s", buf);
-		if (rename(buf, wworklist))
-			err(1, "Error renaming %s to %s", buf, wworklist);
-		(void)fprintf(stderr, " done.\n");
+		if (rename(buf, write_worklist_file))
+			err(1, "Error renaming %s to %s",
+			    buf, write_worklist_file);
+		fprintf(stderr, " done.\n");
 	}
 }
 
 /* Read the worklist if -r was given */
-static off_t
-read_worklist(off_t t)
+static uint64_t
+read_worklist(void)
 {
-	off_t s, l, d;
-	int state, lines;
+	uintmax_t start, length;
+	uint64_t missing = 0;
+	unsigned pass, lines;
 	FILE *file;
 
-	(void)fprintf(stderr, "Reading worklist ...");
-	(void)fflush(stderr);
-	file = fopen(rworklist, "r");
+	fprintf(stderr, "Reading worklist ...");
+	file = fopen(read_worklist_file, "r");
 	if (file == NULL)
-		err(1, "Error opening file %s", rworklist);
+		err(1, "Error opening file %s", read_worklist_file);
 
 	lines = 0;
-	d = t;
 	for (;;) {
 		++lines;
-		if (3 != fscanf(file, "%jd %jd %d\n", &s, &l, &state)) {
+		if (3 != fscanf(file, "%ju %ju %u\n", &start, &length, &pass)) {
 			if (!feof(file))
-				err(1, "Error parsing file %s at line %d",
-				    rworklist, lines);
+				err(1, "Error parsing file %s at line %u",
+				    read_worklist_file, lines);
 			else
 				break;
 		}
-		new_lump(s, l, state);
-		d -= l;
+		if (length > 0) {
+			new_lump(start, length, pass);
+			missing += length;
+		}
 	}
 	if (fclose(file))
-		err(1, "Error closing file %s", rworklist);
-	(void)fprintf(stderr, " done.\n");
+		err(1, "Error closing file %s", read_worklist_file);
+	fprintf(stderr, " done.\n");
 	/*
-	 * Return the number of bytes already read
-	 * (at least not in worklist).
+	 * Return the number of bytes outstanding
 	 */
-	return (d);
+	return (missing);
 }
 
 /**********************************************************************/
 
 static void
-write_buf(int fd, const void *buf, ssize_t len, off_t where)
+write_buf(int fd, const void *buf, uint64_t length, uint64_t where)
 {
-	ssize_t i;
+	int64_t i;
 
-	i = pwrite(fd, buf, len, where);
-	if (i == len)
+	i = pwrite(fd, buf, length, (off_t)where);
+	if (i > 0 && (uint64_t)i == length)
 		return;
 
-	printf("\nWrite error at %jd/%zu\n\t%s\n",
-	    where, i, strerror(errno));
+	printf("\nWrite error at %ju/%ju: %jd (%s)\n",
+	    (uintmax_t)where,
+	    (uintmax_t)length,
+	    (intmax_t)i, strerror(errno));
 	save_worklist();
 	if (write_errors_are_fatal)
 		exit(3);
 }
 
 static void
-fill_buf(char *buf, ssize_t len, const char *pattern)
+fill_buf(char *buf, int64_t len, const char *pattern)
 {
-	ssize_t sz = strlen(pattern);
-	ssize_t i, j;
+	int64_t sz = strlen(pattern);
+	int64_t i;
 
 	for (i = 0; i < len; i += sz) {
-		j = len - i;
-		if (j > sz)
-			j = sz;
-		memcpy(buf + i, pattern, j);
+		memcpy(buf + i, pattern, MIN(len - i, sz));
 	}
 }
 
@@ -406,45 +418,334 @@ fill_buf(char *buf, ssize_t len, const char *pattern)
 static void
 usage(void)
 {
-	(void)fprintf(stderr, "usage: recoverdisk [-b bigsize] [-r readlist] "
+	fprintf(stderr, "usage: recoverdisk [-b big_read] [-r readlist] "
 	    "[-s interval] [-w writelist] source [destination]\n");
 	/* XXX update */
 	exit(1);
 }
 
 static void
-sighandler(__unused int sig)
+sighandler(int sig)
 {
 
+	(void)sig;
 	aborting = 1;
 }
 
+/**********************************************************************/
+
+static int64_t
+attempt_one_lump(time_t t_now)
+{
+	struct lump *lp;
+	uint64_t sz;
+	int64_t retval;
+	int error;
+
+	lp = TAILQ_FIRST(&lumps);
+	if (lp == NULL)
+		return(0);
+
+	if (lp->pass == 0) {
+		sz = MIN(lp->len, big_read);
+	} else if (lp->pass == 1) {
+		sz = MIN(lp->len, medium_read);
+	} else {
+		sz = MIN(lp->len, small_read);
+	}
+
+	assert(sz != 0);
+
+	n_reads += 1;
+	retval = pread(read_fd, work_buf, sz, lp->start);
+
+#if 0 /* enable this when testing */
+	if (!(random() & 0xf)) {
+		retval = -1;
+		errno = EIO;
+		usleep(20000);
+	} else {
+		usleep(2000);
+	}
+#endif
+
+	error = errno;
+	if (retval > 0) {
+		n_good_reads += 1;
+		sz = retval;
+		done_size += sz;
+		if (write_fd >= 0) {
+			write_buf(write_fd, work_buf, sz, lp->start);
+		}
+		if (log_file != NULL) {
+			fprintf(log_file, "%jd %ju %ju\n",
+			    (intmax_t)t_now,
+			    (uintmax_t)lp->start,
+			    (uintmax_t)sz
+			);
+			fflush(log_file);
+		}
+	} else {
+		printf("%14ju %7ju read error %d: (%s)",
+		    (uintmax_t)lp->start,
+		    (uintmax_t)sz, error, strerror(error));
+		if (error_pause > 1) {
+			printf(" (Pausing %g s)", error_pause);
+		}
+		printf("\n");
+
+		if (write_fd >= 0 && pattern_buf != NULL) {
+			write_buf(write_fd, pattern_buf, sz, lp->start);
+		}
+		new_lump(lp->start, sz, lp->pass + 1);
+		retval = -sz;
+	}
+	lp->start += sz;
+	lp->len -= sz;
+	if (lp->len == 0) {
+		TAILQ_REMOVE(&lumps, lp, list);
+		nlumps -= 1;
+		free(lp);
+	}
+	errno = error;
+	return (retval);
+}
+
+
+/**********************************************************************/
+
+static void
+determine_total_size(void)
+{
+	struct stat sb;
+	int error;
+
+	if (total_size != 0)
+		return;
+
+	error = fstat(read_fd, &sb);
+	if (error < 0)
+		err(1, "fstat failed");
+
+	if (S_ISBLK(sb.st_mode) || S_ISCHR(sb.st_mode)) {
+#ifdef DIOCGMEDIASIZE
+		off_t mediasize;
+		error = ioctl(read_fd, DIOCGMEDIASIZE, &mediasize);
+		if (error == 0 && mediasize > 0) {
+			total_size = mediasize;
+			printf("# Got total_size from DIOCGMEDIASIZE: %ju\n",
+			    (uintmax_t)total_size);
+			return;
+		}
+#endif
+	} else if (S_ISREG(sb.st_mode) && sb.st_size > 0) {
+		total_size = sb.st_size;
+		printf("# Got total_size from stat(2): %ju\n",
+		    (uintmax_t)total_size);
+		return;
+	} else {
+		errx(1, "Input must be device or regular file");
+	}
+	fprintf(stderr, "Specify total size with -t option\n");
+	exit(1);
+}
+
+static void
+determine_read_sizes(void)
+{
+	int error;
+	u_int sectorsize;
+	off_t stripesize;
+
+	determine_total_size();
+
+#ifdef DIOCGSECTORSIZE
+	if (small_read == 0) {
+		error = ioctl(read_fd, DIOCGSECTORSIZE, &sectorsize);
+		if (error >= 0 && sectorsize > 0) {
+			small_read = sectorsize;
+			printf("# Got small_read from DIOCGSECTORSIZE: %ju\n",
+			    (uintmax_t)small_read
+			);
+		}
+	}
+#endif
+
+	if (small_read == 0) {
+		printf("Assuming 512 for small_read\n");
+		small_read = 512;
+	}
+
+	if (medium_read && (medium_read % small_read)) {
+		errx(1,
+		    "medium_read (%ju) is not a multiple of small_read (%ju)\n",
+		    (uintmax_t)medium_read, (uintmax_t)small_read
+		);
+	}
+
+	if (big_read != 0 && (big_read % small_read)) {
+		errx(1,
+		    "big_read (%ju) is not a multiple of small_read (%ju)\n",
+		    (uintmax_t)big_read, (uintmax_t)small_read
+		);
+	}
+
+#ifdef DIOCGSTRIPESIZE
+	if (medium_read == 0) {
+		error = ioctl(read_fd, DIOCGSTRIPESIZE, &stripesize);
+		if (error < 0 || stripesize < 0) {
+			// nope
+		} else if ((uint64_t)stripesize < small_read) {
+			// nope
+		} else if (stripesize % small_read) {
+			// nope
+		} else if (0 < stripesize && stripesize < (128<<10)) {
+			medium_read = stripesize;
*** 398 LINES SKIPPED ***