PERFORCE change 125117 for review

Ulf Lilleengen lulf at FreeBSD.org
Mon Aug 13 12:46:31 PDT 2007


http://perforce.freebsd.org/chv.cgi?CH=125117

Change 125117 by lulf at lulf_carrot on 2007/08/13 19:45:30

	- Improve gv_is_newer hack by adding the drive to check as parameter.
	  There was a case where gvinum didn't have the actual drive first in
	  the list, which ended in comparing the wrong timestamps.
	- Re-add the growable state of a plex.
	- Fix a bug where GV_SD_CANGOUP was set instead of checked.
	- Make raid5 growing depend on that a raid-5 plex is not degraded. I've
	  added some awareness to the fact that a subdisk could be added to
	  degraded raid-5 plex, but I don't allow it for now, since it also
	  requires a rewrite of how degraded writes and reads are done. However,
	  the idea is that all of gvinum should be aware that they can be in a
	  growing phase.
	- Make sure plex doesn't get the grown size before after the grow. This
	  prevents writes outside the actual plex-size.
	- Use gv_start_plex in gv_start_vol since it basically does the same.

Affected files ...

.. //depot/projects/soc2007/lulf/gvinum_fixup/sbin/gvinum/gvinum.c#20 edit
.. //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum.c#35 edit
.. //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum.h#28 edit
.. //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_create.c#10 edit
.. //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_events.c#14 edit
.. //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_init.c#24 edit
.. //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_list.c#5 edit
.. //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_plex.c#26 edit
.. //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_raid5.c#14 edit
.. //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_share.c#6 edit
.. //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_state.c#23 edit
.. //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_subr.c#29 edit
.. //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_var.h#25 edit

Differences ...

==== //depot/projects/soc2007/lulf/gvinum_fixup/sbin/gvinum/gvinum.c#20 (text+ko) ====


==== //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum.c#35 (text+ko) ====


==== //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum.h#28 (text+ko) ====

@@ -93,9 +93,10 @@
 int	gv_consumer_is_open(struct g_consumer *);
 int	gv_provider_is_open(struct g_provider *);
 int	gv_object_type(struct gv_softc *, char *);
-void	gv_parse_config(struct gv_softc *, char *);
+void	gv_parse_config(struct gv_softc *, char *, struct gv_drive *);
 int	gv_sd_to_drive(struct gv_sd *, struct gv_drive *);
 int	gv_sd_to_plex(struct gv_sd *, struct gv_plex *);
+int	gv_sdcount(struct gv_plex *, int);
 void	gv_update_plex_config(struct gv_plex *);
 void	gv_update_vol_size(struct gv_volume *, off_t);
 off_t	gv_vol_size(struct gv_volume *);

==== //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_create.c#10 (text+ko) ====

@@ -253,6 +253,13 @@
 		return (GV_ERR_CREATE);
 	}
 
+	if (p->org == GV_PLEX_RAID5 && p->state == GV_PLEX_DEGRADED) {
+		printf("VINUM: can't add subdisk to %s, rebuild plex before "
+		    " adding subdisks\n", p->name);
+		g_free(s);
+		return (0);
+	}
+
 	/*
 	 * First we give the subdisk to the drive, to handle autosized
 	 * values ...

==== //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_events.c#14 (text+ko) ====

@@ -126,7 +126,7 @@
 		g_free(hdr);
 		goto failed;
 	}
-	gv_parse_config(sc, buf);
+	gv_parse_config(sc, buf, d);
 	g_free(buf);
 
 	g_topology_lock();
@@ -213,4 +213,5 @@
 		LIST_INSERT_HEAD(&sc->drives, d, drive);
 	else
 		LIST_INSERT_AFTER(d2, d, drive);
+	gv_save_config(sc);
 }

==== //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_init.c#24 (text+ko) ====

@@ -104,14 +104,14 @@
 
 	KASSERT(p != NULL, ("gv_start_plex: NULL p"));
 
-	if (p->state == GV_PLEX_UP)
-		return (0);
+/*	if (p->state == GV_PLEX_UP)
+		return (0);*/
 
 	error = 0;
 	v = p->vol_sc;
-	if ((v != NULL) && (v->plexcount > 1))
-		error = gv_sync(v);
-	else if (p->org == GV_PLEX_STRIPED) {
+/*	if ((v != NULL) && (v->plexcount > 1))
+		error = gv_sync(v);*/
+	if (p->org == GV_PLEX_STRIPED) {
 		grow = 0;
 		LIST_FOREACH(s, &p->subdisks, in_plex) {
 			if (s->flags & GV_SD_GROW) {
@@ -122,18 +122,15 @@
 		if (grow)
 			error = gv_grow_plex(p);
 	} else if (p->org == GV_PLEX_RAID5) {
-		if (p->state == GV_PLEX_DEGRADED) {
-			rebuild = 0;
+		if (p->state > GV_PLEX_DEGRADED) {
 			LIST_FOREACH(s, &p->subdisks, in_plex) {
-				if (s->state < GV_SD_UP) {
-					rebuild = 1;
-					break;
+				if (s->flags & GV_SD_GROW) {
+					error = gv_grow_plex(p);
+					return (error);
 				}
 			}
-			if (rebuild)
-				error = gv_rebuild_plex(p);
-			else
-				error = gv_grow_plex(p);
+		} else if (p->state == GV_PLEX_DEGRADED) {
+			error = gv_rebuild_plex(p);
 		} else
 			error = gv_init_plex(p);
 	}
@@ -158,23 +155,7 @@
 	else if (v->plexcount == 1) {
 		p = LIST_FIRST(&v->plexes);
 		KASSERT(p != NULL, ("gv_start_vol: NULL p on %s", v->name));
-		if (p->org == GV_PLEX_RAID5) {
-			switch (p->state) {
-			case GV_PLEX_DOWN:
-				error = gv_init_plex(p);
-				break;
-			case GV_PLEX_DEGRADED:
-				error = gv_rebuild_plex(p);
-				break;
-			default:
-				return (0);
-			}
-		} else {
-			LIST_FOREACH(s, &p->subdisks, in_plex) {
-				gv_set_sd_state(s, GV_SD_UP,
-				    GV_SETSTATE_CONFIG);
-			}
-		}
+		error = gv_start_plex(p);
 	} else
 		error = gv_sync(v);
 
@@ -239,6 +220,8 @@
 static int
 gv_rebuild_plex(struct gv_plex *p)
 {
+	struct gv_drive *d;
+	struct gv_sd *s;
 
 /* XXX: Is this safe? (Allows for mounted rebuild)*/
 /*	if (gv_provider_is_open(p->vol_sc->provider))
@@ -248,6 +231,18 @@
 	    p->flags & GV_PLEX_REBUILDING ||
 	    p->flags & GV_PLEX_GROWING)
 		return (EINPROGRESS);
+	/*
+	 * Make sure that all subdisks have consumers. We won't allow a rebuild
+	 * unless every subdisk have one.
+	 */
+	LIST_FOREACH(s, &p->subdisks, in_plex) {
+		d = s->drive_sc;
+		if (d == NULL || (d->flags & GV_DRIVE_REFERENCED)) {
+			printf("VINUM: can't rebuild %s, subdisk(s) have no "
+			    "drives\n", p->name);
+			return (ENXIO);
+		}
+	}
 	p->flags |= GV_PLEX_REBUILDING;
 	p->synced = 0;
 

==== //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_list.c#5 (text+ko) ====

@@ -300,7 +300,7 @@
 			    (intmax_t)p->synced,
 			    (int)((p->synced * 100) / p->size));
 		}
-		printf("\t\tOrganization: %s", gv_plexorg(p->org));
+		sbuf_printf(sb, "\t\tOrganization: %s", gv_plexorg(p->org));
 		if (gv_is_striped(p)) {
 			sbuf_printf(sb, "\tStripe size: %s\n",
 			    gv_roughlength(p->stripesize, 1));

==== //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_plex.c#26 (text+ko) ====

@@ -161,15 +161,9 @@
 		KASSERT(stripeno >= 0, ("gv_plex_offset: stripeno < 0"));
 
 		/* Take growing subdisks into account when calculating. */
-		sdcount = p->sdcount;
-		if (boff >= p->synced) {
-			LIST_FOREACH(s, &p->subdisks, in_plex) {
-				if (s->flags & GV_SD_GROW)
-					sdcount--;
-			}
-		} else if (!(boff + bcount <= p->synced)){
+		sdcount = gv_sdcount(p, (boff >= p->synced));
+		if (!(boff + bcount <= p->synced))
 			return (GV_ERR_ISBUSY);
-		}
 		/* The number of the subdisk where the stripe resides. */
 		*sdno = stripeno % sdcount;
 
@@ -712,11 +706,7 @@
 			g_free(bp->bio_data);
 
 		/* Find the real size of the plex. */
-		sdcount = p->sdcount;
-		LIST_FOREACH(s, &p->subdisks, in_plex) {
-			if (s->flags & GV_SD_GROW)
-				sdcount--;
-		}
+		sdcount = gv_sdcount(p, 1);
 		s = LIST_FIRST(&p->subdisks);
 		/* XXX: should not ever happen */
 		if (s == NULL) {
@@ -731,6 +721,7 @@
 				s->flags &= ~GV_SD_GROW;
 				gv_set_sd_state(s, GV_SD_UP, 0);
 			}
+			p->size = gv_plex_size(p);
 			gv_set_plex_state(p, GV_PLEX_UP, 0);
 			g_topology_lock();
 			gv_access(v->provider, -1, -1, 0);
@@ -974,7 +965,7 @@
 {
 	struct gv_sd *s;
 	int error, flags;
-	off_t offset;
+	off_t offset, plexsize;
 
 	error = bp->bio_error;
 	flags = bp->bio_cflags;
@@ -1000,7 +991,7 @@
 		return;
 	}
 
-	offset += (p->stripesize * (p->sdcount - 1));
+	offset += (p->stripesize * (gv_sdcount(p, 1) - 1));
 	if (offset >= p->size) {
 		/* We're finished. */
 		printf("VINUM: rebuild of %s finished\n", p->name);

==== //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_raid5.c#14 (text+ko) ====

@@ -165,7 +165,7 @@
 	if (p == NULL || LIST_EMPTY(&p->subdisks))
 		return (ENXIO);
 
-	gv_raid5_offset(p, boff, bcount, &real_off, &real_len, NULL, &psdno, 0);
+	gv_raid5_offset(p, boff, bcount, &real_off, &real_len, NULL, &psdno, 1);
 
 	/* Find the right subdisk. */
 	parity = NULL;
@@ -239,7 +239,7 @@
 	if (p == NULL || LIST_EMPTY(&p->subdisks))
 		return (ENXIO);
 
-	gv_raid5_offset(p, boff, bcount, &real_off, &real_len, NULL, NULL, 0);
+	gv_raid5_offset(p, boff, bcount, &real_off, &real_len, NULL, NULL, 1);
 
 	/* Find the right subdisk. */
 	broken = NULL;
@@ -553,7 +553,7 @@
 	off_t len_left, stripeend, stripeoff, stripestart;
 
 	sdcount = p->sdcount;
-		if (growing) {
+	if (growing) {
 		LIST_FOREACH(s, &p->subdisks, in_plex) {
 			if (s->flags & GV_SD_GROW)
 				sdcount--;

==== //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_share.c#6 (text+ko) ====

@@ -270,6 +270,8 @@
 		return (GV_PLEX_INITIALIZING);
 	else if (!strcmp(buf, "degraded"))
 		return (GV_PLEX_DEGRADED);
+	else if (!strcmp(buf, "growable"))
+		return (GV_PLEX_GROWABLE);
 	else
 		return (GV_PLEX_DOWN);
 }
@@ -285,6 +287,8 @@
 		return "initializing";
 	case GV_PLEX_DEGRADED:
 		return "degraded";
+	case GV_PLEX_GROWABLE:
+		return "growable";
 	case GV_PLEX_UP:
 		return "up";
 	default:

==== //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_state.c#23 (text+ko) ====

@@ -220,7 +220,7 @@
 
 			if (p->org != GV_PLEX_RAID5)
 				break;
-			else if (s->flags |= GV_SD_CANGOUP) {
+			else if (s->flags & GV_SD_CANGOUP) {
 				s->flags &= ~GV_SD_CANGOUP;
 				break;
 			} else if (flags & GV_SETSTATE_FORCE)
@@ -412,6 +412,7 @@
 void
 gv_update_plex_state(struct gv_plex *p)
 {
+	struct gv_sd *s;
 	int sdstates;
 	int oldstate;
 
@@ -425,6 +426,7 @@
 	/* If all subdisks are up, our plex can be up, too. */
 	if (sdstates == GV_SD_UPSTATE)
 		p->state = GV_PLEX_UP;
+
 	/* One or more of our subdisks are down. */
 	else if (sdstates & GV_SD_DOWNSTATE) {
 		/* A RAID5 plex can handle one dead subdisk. */
@@ -435,15 +437,24 @@
 
 	/* Some of our subdisks are initializing. */
 	} else if (sdstates & GV_SD_INITSTATE) {
+
 		if (p->flags & GV_PLEX_SYNCING ||
-		    p->flags & GV_PLEX_REBUILDING ||
-		    p->flags & GV_PLEX_GROWING)
+		    p->flags & GV_PLEX_REBUILDING)
 			p->state = GV_PLEX_DEGRADED;
 		else
 			p->state = GV_PLEX_DOWN;
 	} else
 		p->state = GV_PLEX_DOWN;
 
+	if (p->state == GV_PLEX_UP) {
+		LIST_FOREACH(s, &p->subdisks, in_plex) {
+			if (s->flags & GV_SD_GROW) {
+				p->state = GV_PLEX_GROWABLE;
+				break;
+			}
+		}
+	}
+
 	if (p->state != oldstate)
 		printf("VINUM: plex %s state change: %s -> %s\n", p->name,
 		    gv_plexstate(oldstate), gv_plexstate(p->state));

==== //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_subr.c#29 (text+ko) ====

@@ -51,11 +51,11 @@
 #include <geom/vinum/geom_vinum.h>
 #include <geom/vinum/geom_vinum_share.h>
 
-int	gv_drive_is_newer(struct gv_softc *);
+int	gv_drive_is_newer(struct gv_softc *, struct gv_drive *);
 static off_t gv_plex_smallest_sd(struct gv_plex *);
 
 void
-gv_parse_config(struct gv_softc *sc, char *buf)
+gv_parse_config(struct gv_softc *sc, char *buf, struct gv_drive *d)
 {
 	char *aptr, *bptr, *cptr;
 	struct gv_volume *v, *v2;
@@ -64,7 +64,7 @@
 	int error, is_newer, tokens;
 	char *token[GV_MAXARGS];
 
-	is_newer = gv_drive_is_newer(sc);
+	is_newer = gv_drive_is_newer(sc, d);
 
 	/* Until the end of the string *buf. */
 	for (aptr = buf; *aptr != '\0'; aptr = bptr) {
@@ -377,9 +377,9 @@
 	} else {
 		if ((p->org == GV_PLEX_RAID5 ||
 		    p->org == GV_PLEX_STRIPED) &&
-		    !(p->flags & GV_PLEX_NEWBORN)) {
+		    !(p->flags & GV_PLEX_NEWBORN) && 
+		    p->state >= GV_PLEX_DEGRADED) {
 			s->flags |= GV_SD_GROW;
-			s->state = GV_SD_UP;
 		}
 		p->sdcount++;
 	}
@@ -397,12 +397,31 @@
 	v->size = size;
 }
 
+/* Return how many subdisks that constitute the original plex. */
+int
+gv_sdcount(struct gv_plex *p, int growing)
+{
+	struct gv_sd *s;
+	int sdcount;
+
+	sdcount = p->sdcount;
+	if (growing) {
+		LIST_FOREACH(s, &p->subdisks, in_plex) {
+			if (s->flags & GV_SD_GROW)
+				sdcount--;
+		}
+	}
+
+	return (sdcount);
+}
+
 /* Calculates the plex size. */
 off_t
 gv_plex_size(struct gv_plex *p)
 {
 	struct gv_sd *s;
 	off_t size;
+	int sdcount;
 
 	KASSERT(p != NULL, ("gv_plex_size: NULL p"));
 
@@ -411,6 +430,7 @@
 
 	/* Adjust the size of our plex. */
 	size = 0;
+	sdcount = gv_sdcount(p, 1);
 	switch (p->org) {
 	case GV_PLEX_CONCAT:
 		LIST_FOREACH(s, &p->subdisks, in_plex)
@@ -418,11 +438,11 @@
 		break;
 	case GV_PLEX_STRIPED:
 		s = LIST_FIRST(&p->subdisks);
-		size = p->sdcount * s->size;
+		size = sdcount * s->size;
 		break;
 	case GV_PLEX_RAID5:
 		s = LIST_FIRST(&p->subdisks);
-		size = (p->sdcount - 1) * s->size;
+		size = (sdcount - 1) * s->size;
 		break;
 	}
 
@@ -521,11 +541,10 @@
 			gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
 		p->flags &= ~GV_PLEX_ADDED;
 		gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
-	} else {
+	} else if (p->state == GV_PLEX_UP) {
 		LIST_FOREACH(s, &p->subdisks, in_plex) {
 			if (s->flags & GV_SD_GROW) {
-				gv_set_plex_state(p, GV_PLEX_DEGRADED,
-				    GV_SETSTATE_FORCE);
+				p->state = GV_PLEX_GROWABLE;
 				break;
 			}
 		}
@@ -938,19 +957,14 @@
  * Return 1 if a > b, 0 otherwise.
  */
 int
-gv_drive_is_newer(struct gv_softc *sc)
+gv_drive_is_newer(struct gv_softc *sc, struct gv_drive *d)
 {
-	struct gv_drive *d, *d2;
+	struct gv_drive *d2;
 	struct timeval *a, *b;
 
 	KASSERT(!LIST_EMPTY(&sc->drives),
 	    ("gv_is_drive_newer: empty drive list"));
 
-	/*
-	 * We assume that the first drive on the list is the one to be compared
-	 * with the others.
-	 */
-	d = LIST_FIRST(&sc->drives);
 	a = &d->hdr->label.last_update;
 	LIST_FOREACH(d2, &sc->drives, drive) {
 		if ((d == d2) || (d2->state != GV_DRIVE_UP) ||

==== //depot/projects/soc2007/lulf/gvinum_fixup/sys/geom/vinum/geom_vinum_var.h#25 (text+ko) ====

@@ -312,7 +312,7 @@
 #define	GV_PLEX_DOWN		0
 #define	GV_PLEX_INITIALIZING	1
 #define	GV_PLEX_DEGRADED	2
-#define GV_PLEX_RESIZING	3
+#define GV_PLEX_GROWABLE	3
 #define	GV_PLEX_UP		4
 
 	int	org;			/* The plex organisation. */


More information about the p4-projects mailing list