kern/60313: data destruction: lseek() misalignment silently
ignored by some block devices
Bruce Evans
bde at zeta.org.au
Tue Dec 16 18:14:30 PST 2003
On Wed, 17 Dec 2003, Matthias Andree wrote:
> >Description:
> da(4) will happily return success on lseek() to some random position that is
> not aligned at a block boundary and start writing from the beginning, byte 0,
> of the block, ignoring the lseek().
>
> In other words, if I have a drive with 512 bytes per block, seek to offset 256
> and write 512 bytes, da(4) will scribble over the first 256 bytes as well.
> Either the seek or the write should be rejected.
The boundary is checked in dscheck() in RELENG_4, but only after
forgetting it mod DEV_BSIZE, so only offsets that are multiples of
DEV_BSIZE but not a multiple of the sector size are detected.
The boundary is forgotten here in physio() (RELENG_4 version):
% blockno = bp->b_offset >> DEV_BSHIFT;
% if (chk_blockno && (daddr_t)blockno != blockno) {
% error = EINVAL; /* blockno overflow */
% goto doerror;
% }
% bp->b_blkno = blockno;
except bp->b_offset is still available so lower layers can check it. None
do. (The acd driver doesn't use dscheck() and uses b_offset to determine
its own idea of the block number; it has a subset of dscheck()'s other
boundary checks and has the same nonexistent boundary checking as physio()
for the offset.)
> vn(4) will accept the seek but a subsequent write(2) will return the bogus
> EINVAL in errno.
I don't see how vn(4) can be missing the bug. In the "labels" case it
uses dscheck() which doesn't check b_offset, and in the non-labels case
it uses its own check which also doesn't check b_offset.
> ad(4) status is unknown.
I don't see how it can be missing the bug.
> FreeBSD 5 status is unknown.
-current is quite different. It deprecates b_blkno and uses b_offset
more (mostly in its bio_offset form), and uses GEOM (except in my
version). physio() still sets b_blkno without checking the boundary,
but b_blkno is mostly not used.
The problem seems to be fixed in GEOM (function g_io_check()).
I fixed it my version of dscheck() but didn't get around
to committing the fix before dscheck() was axed. Here are my current
diffs relative to RELENG_4. They don't apply directly. RELENG_4 needs
mainly the "if (offset % (uoff_t)DEV_BSIZE) {" clause. The patch also
fixes some slightly incorrect choices of types that become fatally
incorrect with larger daddr_t's in -current, and fixes the longstanding
brokenness of EOF handling.
%%%
Index: subr_diskslice.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/Attic/subr_diskslice.c,v
retrieving revision 1.82.2.6
diff -u -2 -r1.82.2.6 subr_diskslice.c
--- subr_diskslice.c 24 Jul 2001 09:49:41 -0000 1.82.2.6
+++ subr_diskslice.c 7 Dec 2003 05:05:22 -0000
@@ -134,47 +284,55 @@
int
dscheck(bp, ssp)
- struct buf *bp;
+ struct bio *bp;
struct diskslices *ssp;
{
- daddr_t blkno;
- u_long endsecno;
- daddr_t labelsect;
+ daddr_t blkno;
+ u_long endsecno;
+ daddr_t labelsect;
struct disklabel *lp;
char *msg;
- long nsec;
+ long nsec;
+ off_t offset;
struct partition *pp;
- daddr_t secno;
- daddr_t slicerel_secno;
+ daddr_t secno;
+ daddr_t slicerel_secno;
struct diskslice *sp;
- int s;
- blkno = bp->b_blkno;
- if (blkno < 0) {
- printf("dscheck(%s): negative b_blkno %ld\n",
- devtoname(bp->b_dev), (long)blkno);
- bp->b_error = EINVAL;
+ offset = bp->bio_offset;
+ if (offset < 0) {
+ printf("dscheck(%s): negative bio_offset %jd\n",
+ devtoname(bp->bio_dev), (intmax_t)offset);
+ bp->bio_error = EINVAL;
+ goto bad;
+ }
+ if (offset % (uoff_t)DEV_BSIZE) {
+ printf(
+ "dscheck(%s): bio_offset %jd is not on a DEV_BSIZE boundary\n",
+ devtoname(bp->bio_dev), (intmax_t)offset);
+ bp->bio_error = EINVAL;
goto bad;
}
- sp = &ssp->dss_slices[dkslice(bp->b_dev)];
+ blkno = btodb(bp->bio_offset);
+ sp = &ssp->dss_slices[dkslice(bp->bio_dev)];
lp = sp->ds_label;
if (ssp->dss_secmult == 1) {
- if (bp->b_bcount % (u_long)DEV_BSIZE)
+ if (bp->bio_bcount % (u_long)DEV_BSIZE)
goto bad_bcount;
secno = blkno;
- nsec = bp->b_bcount >> DEV_BSHIFT;
+ nsec = bp->bio_bcount >> DEV_BSHIFT;
} else if (ssp->dss_secshift != -1) {
- if (bp->b_bcount & (ssp->dss_secsize - 1))
+ if (bp->bio_bcount & (ssp->dss_secsize - 1))
goto bad_bcount;
if (blkno & (ssp->dss_secmult - 1))
- goto bad_blkno;
+ goto bad_offset;
secno = blkno >> ssp->dss_secshift;
- nsec = bp->b_bcount >> (DEV_BSHIFT + ssp->dss_secshift);
+ nsec = bp->bio_bcount >> (DEV_BSHIFT + ssp->dss_secshift);
} else {
- if (bp->b_bcount % ssp->dss_secsize)
+ if (bp->bio_bcount % ssp->dss_secsize)
goto bad_bcount;
if (blkno % ssp->dss_secmult)
- goto bad_blkno;
+ goto bad_offset;
secno = blkno / ssp->dss_secmult;
- nsec = bp->b_bcount / ssp->dss_secsize;
+ nsec = bp->bio_bcount / ssp->dss_secsize;
}
if (lp == NULL) {
@@ -184,6 +342,6 @@
} else {
labelsect = lp->d_partitions[LABEL_PART].p_offset;
-if (labelsect != 0) Debugger("labelsect != 0 in dscheck()");
- pp = &lp->d_partitions[dkpart(bp->b_dev)];
+ KASSERT(labelsect == 0, ("labelsect != 0 in dscheck()"));
+ pp = &lp->d_partitions[dkpart(bp->bio_dev)];
endsecno = pp->p_size;
slicerel_secno = pp->p_offset + secno;
@@ -196,6 +354,6 @@
slicerel_secno + nsec > LABELSECTOR + labelsect &&
#endif
- (bp->b_flags & B_READ) == 0 && sp->ds_wlabel == 0) {
- bp->b_error = EROFS;
+ bp->bio_cmd == BIO_WRITE && sp->ds_wlabel == 0) {
+ bp->bio_error = EROFS;
goto bad;
}
@@ -203,7 +361,7 @@
#if defined(DOSBBSECTOR) && defined(notyet)
/* overwriting master boot record? */
- if (slicerel_secno <= DOSBBSECTOR && (bp->b_flags & B_READ) == 0 &&
+ if (slicerel_secno <= DOSBBSECTOR && bp->bio_cmd == BIO_WRITE &&
sp->ds_wlabel == 0) {
- bp->b_error = EROFS;
+ bp->bio_error = EROFS;
goto bad;
}
@@ -211,20 +369,19 @@
/* beyond partition? */
- if (secno + nsec > endsecno) {
- /* if exactly at end of disk, return an EOF */
- if (secno == endsecno) {
- bp->b_resid = bp->b_bcount;
- return (0);
- }
- /* or truncate if part of it fits */
- nsec = endsecno - secno;
- if (nsec <= 0) {
- bp->b_error = EINVAL;
+ if ((uintmax_t)secno + nsec > endsecno) {
+ /* If at or beyond end of partition, return EOF. */
+ if (secno >= endsecno) {
+ if (bp->bio_cmd == BIO_READ) {
+ bp->bio_resid = bp->bio_bcount;
+ return (0);
+ }
+ bp->bio_error = ENOSPC;
goto bad;
}
- bp->b_bcount = nsec * ssp->dss_secsize;
+ /* Truncate to the part that fits. */
+ bp->bio_bcount = (endsecno - secno) * ssp->dss_secsize;
}
- bp->b_pblkno = sp->ds_offset + slicerel_secno;
+ bp->bio_pblkno = sp->ds_offset + slicerel_secno;
/*
@@ -237,18 +394,18 @@
&& slicerel_secno + nsec > LABELSECTOR + labelsect
#endif
+ /* XXX following seems to be broken for dedicated case: */
&& sp->ds_offset != 0) {
struct iodone_chain *ic;
ic = malloc(sizeof *ic , M_DEVBUF, M_WAITOK);
- ic->ic_prev_flags = bp->b_flags;
- ic->ic_prev_iodone = bp->b_iodone;
- ic->ic_prev_iodone_chain = bp->b_iodone_chain;
+ ic->ic_prev_flags = bp->bio_flags;
+ ic->ic_prev_iodone = bp->bio_done;
+ ic->ic_prev_iodone_chain = bp->bio_done_chain;
ic->ic_args[0].ia_long = (LABELSECTOR + labelsect -
slicerel_secno) * ssp->dss_secsize;
ic->ic_args[1].ia_ptr = sp;
- bp->b_flags |= B_CALL;
- bp->b_iodone = dsiodone;
- bp->b_iodone_chain = ic;
- if (!(bp->b_flags & B_READ)) {
+ bp->bio_done = dsiodone;
+ bp->bio_done_chain = ic;
+ if (bp->bio_cmd != BIO_READ) {
/*
* XXX even disklabel(8) writes directly so we need
@@ -260,18 +417,13 @@
* temporarily corrupting the in-core copy.
*/
- if (bp->b_vp != NULL) {
- s = splbio();
- bp->b_vp->v_numoutput++;
- splx(s);
- }
/* XXX need name here. */
msg = fixlabel((char *)NULL, sp,
(struct disklabel *)
- (bp->b_data + ic->ic_args[0].ia_long),
+ (bp->bio_data + ic->ic_args[0].ia_long),
TRUE);
if (msg != NULL) {
printf("dscheck(%s): %s\n",
- devtoname(bp->b_dev), msg);
- bp->b_error = EROFS;
+ devtoname(bp->bio_dev), msg);
+ bp->bio_error = EROFS;
goto bad;
}
@@ -282,19 +434,19 @@
bad_bcount:
printf(
- "dscheck(%s): b_bcount %ld is not on a sector boundary (ssize %d)\n",
- devtoname(bp->b_dev), bp->b_bcount, ssp->dss_secsize);
- bp->b_error = EINVAL;
+ "dscheck(%s): bio_bcount %ld is not on a sector boundary (ssize %d)\n",
+ devtoname(bp->bio_dev), bp->bio_bcount, ssp->dss_secsize);
+ bp->bio_error = EINVAL;
goto bad;
-bad_blkno:
+bad_offset:
printf(
- "dscheck(%s): b_blkno %ld is not on a sector boundary (ssize %d)\n",
- devtoname(bp->b_dev), (long)blkno, ssp->dss_secsize);
- bp->b_error = EINVAL;
+ "dscheck(%s): bio_offset %jd is not on a sector boundary (ssize %d)\n",
+ devtoname(bp->bio_dev), (intmax_t)offset, ssp->dss_secsize);
+ bp->bio_error = EINVAL;
goto bad;
bad:
- bp->b_resid = bp->b_bcount;
- bp->b_flags |= B_ERROR;
+ bp->bio_resid = bp->bio_bcount;
+ bp->bio_flags |= BIO_ERROR;
return (-1);
}
%%%
Bruce
More information about the freebsd-bugs
mailing list