svn commit: r219752 - projects/graid/head/sys/geom/raid
Alexander Motin
mav at FreeBSD.org
Fri Mar 18 17:49:12 UTC 2011
Author: mav
Date: Fri Mar 18 17:49:11 2011
New Revision: 219752
URL: http://svn.freebsd.org/changeset/base/219752
Log:
Set of Promise MD improvements:
- add support for identifying SPARE drives;
- add support for reading and writing degraded states;
- fix detach handling;
- fix RAID1 writing;
Modified:
projects/graid/head/sys/geom/raid/md_promise.c
Modified: projects/graid/head/sys/geom/raid/md_promise.c
==============================================================================
--- projects/graid/head/sys/geom/raid/md_promise.c Fri Mar 18 17:23:23 2011 (r219751)
+++ projects/graid/head/sys/geom/raid/md_promise.c Fri Mar 18 17:49:11 2011 (r219752)
@@ -45,7 +45,7 @@ __FBSDID("$FreeBSD$");
static MALLOC_DEFINE(M_MD_PROMISE, "md_promise_data", "GEOM_RAID Promise metadata");
#define PROMISE_MAX_DISKS 8
-#define PROMISE_MAX_SUBDISKS 4
+#define PROMISE_MAX_SUBDISKS 2
#define PROMISE_META_OFFSET 14
struct promise_raid_disk {
@@ -93,6 +93,7 @@ struct promise_raid_conf {
#define PROMISE_S_READY 0x08
#define PROMISE_S_DEGRADED 0x10
#define PROMISE_S_MARKED 0x20
+#define PROMISE_S_MIGRATING 0x40
#define PROMISE_S_FUNCTIONAL 0x80
uint8_t type; /* Voluem type. */
@@ -237,6 +238,20 @@ promise_meta_find_disk(struct promise_ra
return (-1);
}
+static int
+promise_meta_translate_disk(struct g_raid_volume *vol, int md_disk_pos)
+{
+ int disk_pos, width;
+
+ if (md_disk_pos >= 0 && vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E) {
+ width = vol->v_disks_count / 2;
+ disk_pos = (md_disk_pos / width) +
+ (md_disk_pos % width) * width;
+ } else
+ disk_pos = md_disk_pos;
+ return (disk_pos);
+}
+
static void
promise_meta_get_name(struct promise_raid_conf *meta, char *buf)
{
@@ -310,7 +325,7 @@ next:
return (subdisks);
}
- if (meta->total_disks < 1 || meta->total_disks > PROMISE_MAX_DISKS) {
+ if (meta->total_disks > PROMISE_MAX_DISKS) {
G_RAID_DEBUG(1, "Wrong number of disks on %s (%d)",
pp->name, meta->total_disks);
free(meta, M_MD_PROMISE);
@@ -482,7 +497,7 @@ g_raid_md_promise_start_disk(struct g_ra
struct g_raid_md_promise_perdisk *pd, *oldpd;
struct g_raid_md_promise_pervolume *pv;
struct promise_raid_conf *meta;
- int disk_pos, resurrection = 0;
+ int disk_pos, md_disk_pos, resurrection = 0;
sc = disk->d_softc;
md = sc->sc_md;
@@ -497,14 +512,16 @@ g_raid_md_promise_start_disk(struct g_ra
meta = pv->pv_meta;
/* Find disk position in metadata by it's serial. */
- disk_pos = promise_meta_find_disk(meta, pd->pd_meta[sdn]->disk.id);
+ md_disk_pos = promise_meta_find_disk(meta, pd->pd_meta[sdn]->disk.id);
+ /* For RAID10 we need to translate order. */
+ disk_pos = promise_meta_translate_disk(vol, md_disk_pos);
if (disk_pos < 0) {
G_RAID_DEBUG1(1, sc, "Unknown, probably new or stale disk");
/* Failed stale disk is useless for us. */
-// if (pd->pd_disk_meta.flags & PROMISE_F_FAILED) {
-// g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE_FAILED);
-// return (0);
-// }
+ if (meta->disks[md_disk_pos].flags & PROMISE_F_DOWN) {
+ g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE_FAILED);
+ return (0);
+ }
/* If we are in the start process, that's all for now. */
if (!pv->pv_started)
goto nofit;
@@ -541,17 +558,15 @@ g_raid_md_promise_start_disk(struct g_ra
#endif
if (olddisk == NULL) {
nofit:
-#if 0
- if (pd->pd_disk_meta.flags & PROMISE_F_SPARE) {
+ if (pd->pd_meta[sdn]->disk.flags & PROMISE_F_SPARE) {
g_raid_change_disk_state(disk,
G_RAID_DISK_S_SPARE);
return (1);
} else {
-#endif
g_raid_change_disk_state(disk,
G_RAID_DISK_S_STALE);
return (0);
-// }
+ }
}
oldpd = (struct g_raid_md_promise_perdisk *)olddisk->d_md_data;
// disk_pos = oldpd->pd_disk_pos;
@@ -598,116 +613,50 @@ nofit:
/* Welcome the new disk. */
if (resurrection)
g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
-/* else if (meta->disk[disk_pos].flags & PROMISE_F_FAILED)
+ else if (meta->disks[md_disk_pos].flags & PROMISE_F_DOWN)
g_raid_change_disk_state(disk, G_RAID_DISK_S_FAILED);
- else if (meta->disk[disk_pos].flags & PROMISE_F_SPARE)
- g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE);
-*/ else
+ else
g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
sd->sd_offset = (off_t)pd->pd_meta[sdn]->disk_offset * 512;
sd->sd_size = (off_t)pd->pd_meta[sdn]->disk_sectors * 512;
- /* Up to date disk. */
- g_raid_change_subdisk_state(sd,
- G_RAID_SUBDISK_S_ACTIVE);
- g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
- G_RAID_EVENT_SUBDISK);
-#if 0
- TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
- mvol = promise_get_volume(meta,
- (uintptr_t)(sd->sd_volume->v_md_data));
- mmap0 = promise_get_map(mvol, 0);
- if (mvol->migr_state)
- mmap1 = promise_get_map(mvol, 1);
- else
- mmap1 = mmap0;
-
- if (resurrection) {
- /* Stale disk, almost same as new. */
- g_raid_change_subdisk_state(sd,
- G_RAID_SUBDISK_S_NEW);
- } else if (meta->disk[disk_pos].flags & PROMISE_F_FAILED) {
- /* Failed disk, almost useless. */
- g_raid_change_subdisk_state(sd,
- G_RAID_SUBDISK_S_FAILED);
- } else if (mvol->migr_state == 0) {
- if (mmap0->status == PROMISE_S_UNINITIALIZED) {
- /* Freshly created uninitialized volume. */
- g_raid_change_subdisk_state(sd,
- G_RAID_SUBDISK_S_UNINITIALIZED);
- } else if (mmap0->disk_idx[sd->sd_pos] & PROMISE_DI_RBLD) {
- /* Freshly inserted disk. */
- g_raid_change_subdisk_state(sd,
- G_RAID_SUBDISK_S_NEW);
- } else if (mvol->dirty) {
- /* Dirty volume (unclean shutdown). */
- g_raid_change_subdisk_state(sd,
- G_RAID_SUBDISK_S_STALE);
- } else {
- /* Up to date disk. */
- g_raid_change_subdisk_state(sd,
- G_RAID_SUBDISK_S_ACTIVE);
- }
- } else if (mvol->migr_type == PROMISE_MT_INIT ||
- mvol->migr_type == PROMISE_MT_REBUILD) {
- if (mmap0->disk_idx[sd->sd_pos] & PROMISE_DI_RBLD) {
- /* Freshly inserted disk. */
- g_raid_change_subdisk_state(sd,
- G_RAID_SUBDISK_S_NEW);
- } else if (mmap1->disk_idx[sd->sd_pos] & PROMISE_DI_RBLD) {
- /* Rebuilding disk. */
- g_raid_change_subdisk_state(sd,
- G_RAID_SUBDISK_S_REBUILD);
- if (mvol->dirty) {
- sd->sd_rebuild_pos = 0;
- } else {
- sd->sd_rebuild_pos =
- (off_t)mvol->curr_migr_unit *
- sd->sd_volume->v_strip_size *
- mmap0->total_domains;
- }
- } else if (mvol->dirty) {
- /* Dirty volume (unclean shutdown). */
- g_raid_change_subdisk_state(sd,
- G_RAID_SUBDISK_S_STALE);
- } else {
- /* Up to date disk. */
- g_raid_change_subdisk_state(sd,
- G_RAID_SUBDISK_S_ACTIVE);
- }
- } else if (mvol->migr_type == PROMISE_MT_VERIFY ||
- mvol->migr_type == PROMISE_MT_REPAIR) {
- if (mmap0->disk_idx[sd->sd_pos] & PROMISE_DI_RBLD) {
- /* Freshly inserted disk. */
- g_raid_change_subdisk_state(sd,
- G_RAID_SUBDISK_S_NEW);
- } else if (mmap1->disk_idx[sd->sd_pos] & PROMISE_DI_RBLD) {
- /* Resyncing disk. */
- g_raid_change_subdisk_state(sd,
- G_RAID_SUBDISK_S_RESYNC);
- if (mvol->dirty) {
- sd->sd_rebuild_pos = 0;
- } else {
- sd->sd_rebuild_pos =
- (off_t)mvol->curr_migr_unit *
- sd->sd_volume->v_strip_size *
- mmap0->total_domains;
- }
- } else if (mvol->dirty) {
- /* Dirty volume (unclean shutdown). */
- g_raid_change_subdisk_state(sd,
- G_RAID_SUBDISK_S_STALE);
- } else {
- /* Up to date disk. */
- g_raid_change_subdisk_state(sd,
- G_RAID_SUBDISK_S_ACTIVE);
- }
+ if (resurrection) {
+ /* Stale disk, almost same as new. */
+ g_raid_change_subdisk_state(sd,
+ G_RAID_SUBDISK_S_NEW);
+ } else if (meta->disks[md_disk_pos].flags & PROMISE_F_DOWN) {
+ /* Failed disk. */
+ g_raid_change_subdisk_state(sd,
+ G_RAID_SUBDISK_S_FAILED);
+ } else if (meta->disks[md_disk_pos].flags & PROMISE_F_REDIR) {
+ /* Rebuilding disk. */
+ g_raid_change_subdisk_state(sd,
+ G_RAID_SUBDISK_S_REBUILD);
+ if (pd->pd_meta[sdn]->generation != meta->generation)
+ sd->sd_rebuild_pos = 0;
+ else {
+ sd->sd_rebuild_pos =
+ pd->pd_meta[sdn]->rebuild_lba * 512;
}
- g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
- G_RAID_EVENT_SUBDISK);
+ } else if (!(meta->disks[md_disk_pos].flags & PROMISE_F_ONLINE)) {
+ /* Rebuilding disk. */
+ g_raid_change_subdisk_state(sd,
+ G_RAID_SUBDISK_S_NEW);
+ } else if (pd->pd_meta[sdn]->generation != meta->generation ||
+ (meta->status & PROMISE_S_MARKED)) {
+ /* Stale disk or dirty volume (unclean shutdown). */
+ g_raid_change_subdisk_state(sd,
+ G_RAID_SUBDISK_S_STALE);
+ } else {
+ /* Up to date disk. */
+ g_raid_change_subdisk_state(sd,
+ G_RAID_SUBDISK_S_ACTIVE);
}
+ g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
+ G_RAID_EVENT_SUBDISK);
+#if 0
/* Update status of our need for spare. */
if (mdi->mdio_started) {
mdi->mdio_incomplete =
@@ -906,6 +855,14 @@ g_raid_md_promise_new_disk(struct g_raid
for (i = 0; i < pd->pd_subdisks; i++) {
pdmeta = pd->pd_meta[i];
+ if (pdmeta->disk.number == 0xff) {
+ if (pdmeta->disk.flags & PROMISE_F_SPARE) {
+ g_raid_change_disk_state(disk,
+ G_RAID_DISK_S_SPARE);
+ }
+ continue;
+ }
+
/* Look for volume with matching ID. */
vol = g_raid_md_promise_get_volume(sc, pdmeta->volume_id);
if (vol == NULL) {
@@ -949,6 +906,8 @@ g_raid_md_promise_new_disk(struct g_raid
/* Look for volume with matching ID. */
vol = g_raid_md_promise_get_volume(sc, pdmeta->volume_id);
+ if (vol == NULL)
+ continue;
pv = vol->v_md_data;
if (pv->pv_started) {
@@ -1116,7 +1075,6 @@ g_raid_md_event_promise(struct g_raid_md
struct g_raid_disk *disk, u_int event)
{
struct g_raid_softc *sc;
- struct g_raid_subdisk *sd;
struct g_raid_md_promise_object *mdi;
struct g_raid_md_promise_perdisk *pd;
@@ -1127,24 +1085,9 @@ g_raid_md_event_promise(struct g_raid_md
pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data;
switch (event) {
case G_RAID_DISK_E_DISCONNECTED:
- /* If disk was assigned, just update statuses. */
- if (pd->pd_subdisks >= 0) {
- g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE);
- if (disk->d_consumer) {
- g_raid_kill_consumer(sc, disk->d_consumer);
- disk->d_consumer = NULL;
- }
- TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
- g_raid_change_subdisk_state(sd,
- G_RAID_SUBDISK_S_NONE);
- g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED,
- G_RAID_EVENT_SUBDISK);
- }
- } else {
- /* Otherwise -- delete. */
- g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
- g_raid_destroy_disk(disk);
- }
+ /* Delete disk. */
+ g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE);
+ g_raid_destroy_disk(disk);
/* Write updated metadata to all disks. */
g_raid_md_write_promise(md, NULL, NULL, NULL);
@@ -1810,7 +1753,7 @@ g_raid_md_write_promise(struct g_raid_md
struct g_raid_md_promise_perdisk *pd;
struct g_raid_md_promise_pervolume *pv;
struct promise_raid_conf *meta;
- int i, j;
+ int i, j, pos;
sc = md->mdo_softc;
mdi = (struct g_raid_md_promise_object *)md;
@@ -1848,6 +1791,8 @@ g_raid_md_write_promise(struct g_raid_md
pv->pv_generation++;
meta = malloc(sizeof(*meta), M_MD_PROMISE, M_WAITOK | M_ZERO);
+ if (pv->pv_meta != NULL)
+ memcpy(meta, pv->pv_meta, sizeof(*meta));
memcpy(meta->promise_id, PROMISE_MAGIC, sizeof(PROMISE_MAGIC));
meta->dummy_0 = 0x00020000;
meta->integrity = PROMISE_I_VALID;
@@ -1855,6 +1800,10 @@ g_raid_md_write_promise(struct g_raid_md
meta->generation = pv->pv_generation;
meta->status = PROMISE_S_VALID | PROMISE_S_ONLINE |
PROMISE_S_INITED | PROMISE_S_READY;
+ if (vol->v_state < G_RAID_VOLUME_S_OPTIMAL)
+ meta->status |= PROMISE_S_DEGRADED;
+ if (vol->v_dirty)
+ meta->status |= PROMISE_S_MARKED;
if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0 ||
vol->v_raid_level == G_RAID_VOLUME_RL_SINGLE)
meta->type = PROMISE_T_RAID0;
@@ -1872,12 +1821,13 @@ g_raid_md_write_promise(struct g_raid_md
meta->total_disks = vol->v_disks_count;
meta->stripe_shift = ffs(vol->v_strip_size / 1024);
meta->array_width = vol->v_disks_count;
- if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E)
+ if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 ||
+ vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E)
meta->array_width /= 2;
if (pv->pv_meta != NULL)
meta->array_number = pv->pv_meta->array_number;
meta->total_sectors = vol->v_mediasize / vol->v_sectorsize;
- meta->cylinders = meta->total_sectors / (254 * 63);
+ meta->cylinders = meta->total_sectors / (255 * 63) - 1;
meta->heads = 254;
meta->sectors = 63;
if (pv->pv_meta != NULL)
@@ -1885,14 +1835,27 @@ g_raid_md_write_promise(struct g_raid_md
else
arc4rand(&meta->volume_id, sizeof(meta->volume_id), 0);
for (i = 0; i < vol->v_disks_count; i++) {
- meta->disks[i].flags = PROMISE_F_VALID |
- PROMISE_F_ONLINE | PROMISE_F_ASSIGNED;
- meta->disks[i].number = i;
+ sd = &vol->v_subdisks[i];
+ /* For RAID10 we need to translate order. */
+ pos = promise_meta_translate_disk(vol, i);
+ meta->disks[pos].flags = PROMISE_F_VALID |
+ PROMISE_F_ASSIGNED;
+ if (sd->sd_state == G_RAID_SUBDISK_S_NONE) {
+ meta->disks[pos].flags |= 0;
+ } else if (sd->sd_state == G_RAID_SUBDISK_S_FAILED) {
+ meta->disks[pos].flags |=
+ PROMISE_F_DOWN | PROMISE_F_REDIR;
+ } else if (sd->sd_state < G_RAID_SUBDISK_S_ACTIVE) {
+ meta->disks[pos].flags |=
+ PROMISE_F_ONLINE | PROMISE_F_REDIR;
+ } else
+ meta->disks[pos].flags |= PROMISE_F_ONLINE;
if (pv->pv_meta != NULL) {
- meta->disks[i].id = pv->pv_meta->disks[i].id;
+ meta->disks[pos].id = pv->pv_meta->disks[pos].id;
} else {
- arc4rand(&meta->disks[i].id,
- sizeof(meta->disks[i].id), 0);
+ meta->disks[pos].number = i * 2;
+ arc4rand(&meta->disks[pos].id,
+ sizeof(meta->disks[pos].id), 0);
}
}
promise_meta_put_name(meta, vol->v_name);
@@ -1906,6 +1869,8 @@ g_raid_md_write_promise(struct g_raid_md
disk = sd->sd_disk;
if (disk == NULL)
continue;
+ /* For RAID10 we need to translate order. */
+ pos = promise_meta_translate_disk(vol, i);
pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data;
for (j = 0; j < pd->pd_subdisks; j++) {
if (pd->pd_meta[j]->volume_id == meta->volume_id)
@@ -1916,16 +1881,23 @@ g_raid_md_write_promise(struct g_raid_md
if (pd->pd_meta[j] != NULL)
free(pd->pd_meta[j], M_MD_PROMISE);
pd->pd_meta[j] = promise_meta_copy(meta);
- pd->pd_meta[j]->disk = meta->disks[i];
+ pd->pd_meta[j]->disk = meta->disks[pos];
+ pd->pd_meta[j]->disk.number = pos;
pd->pd_meta[j]->disk_offset = sd->sd_offset / 512;
pd->pd_meta[j]->disk_sectors = sd->sd_size / 512;
- pd->pd_meta[j]->rebuild_lba = sd->sd_rebuild_pos / 512;
+ if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD) {
+ pd->pd_meta[j]->rebuild_lba =
+ sd->sd_rebuild_pos / 512;
+ } else
+ pd->pd_meta[j]->rebuild_lba = 0;
pd->pd_updated = 1;
}
}
TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data;
+ if (disk->d_state != G_RAID_DISK_S_ACTIVE)
+ continue;
if (!pd->pd_updated)
continue;
G_RAID_DEBUG(1, "Writing Promise metadata to %s",
More information about the svn-src-projects
mailing list