svn commit: r217565 - projects/graid/head/sys/geom/raid
Alexander Motin
mav at FreeBSD.org
Tue Jan 18 23:35:09 UTC 2011
Author: mav
Date: Tue Jan 18 23:35:08 2011
New Revision: 217565
URL: http://svn.freebsd.org/changeset/base/217565
Log:
Add spare disks support. Disk counted as spare if it's metadata tells so,
or if kern.geom.raid.aggressive_spare sysctl/tunable is set, disk connected
to Intel controller and has no Intel metadata.
Disks marked as spare by `graid insert ...`, when array is already full.
Unluckily, as soon as Intel treats spare disks as "global spare", they
can't be seen via `graid list/status` at this moment, because they do not
belong to any node/array and opened only when needed.
Modified:
projects/graid/head/sys/geom/raid/g_raid.h
projects/graid/head/sys/geom/raid/md_intel.c
Modified: projects/graid/head/sys/geom/raid/g_raid.h
==============================================================================
--- projects/graid/head/sys/geom/raid/g_raid.h Tue Jan 18 23:00:22 2011 (r217564)
+++ projects/graid/head/sys/geom/raid/g_raid.h Tue Jan 18 23:35:08 2011 (r217565)
@@ -51,6 +51,7 @@ struct g_raid_tr_object;
extern u_int g_raid_aggressive_spare;
extern u_int g_raid_debug;
extern u_int g_raid_start_timeout;
+extern struct g_class g_raid_class;
#define G_RAID_DEBUG(lvl, fmt, ...) do { \
if (g_raid_debug >= (lvl)) { \
Modified: projects/graid/head/sys/geom/raid/md_intel.c
==============================================================================
--- projects/graid/head/sys/geom/raid/md_intel.c Tue Jan 18 23:00:22 2011 (r217564)
+++ projects/graid/head/sys/geom/raid/md_intel.c Tue Jan 18 23:35:08 2011 (r217565)
@@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$");
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/systm.h>
+#include <sys/taskqueue.h>
#include <geom/geom.h>
#include "geom/raid/g_raid.h"
#include "g_raid_md_if.h"
@@ -174,6 +175,7 @@ struct g_raid_md_intel_object {
struct callout mdio_start_co; /* STARTING state timer. */
int mdio_disks_present;
int mdio_started;
+ int mdio_incomplete;
struct root_hold_token *mdio_rootmount; /* Root mount delay token. */
};
@@ -451,6 +453,27 @@ intel_meta_erase(struct g_consumer *cp)
return (error);
}
+static int
+intel_meta_write_spare(struct g_consumer *cp, struct intel_raid_disk *d)
+{
+ struct intel_raid_conf *meta;
+ int error;
+
+ /* Fill anchor and single disk. */
+ meta = malloc(INTEL_MAX_MD_SIZE(1), M_MD_INTEL, M_WAITOK | M_ZERO);
+ memcpy(&meta->intel_id[0], INTEL_MAGIC, sizeof(INTEL_MAGIC));
+ memcpy(&meta->version[0], INTEL_VERSION_1000,
+ sizeof(INTEL_VERSION_1000));
+ meta->config_size = INTEL_MAX_MD_SIZE(1);
+ meta->config_id = arc4random();
+ meta->generation = 1;
+ meta->total_disks = 1;
+ meta->disk[0] = *d;
+ error = intel_meta_write(cp, meta);
+ free(meta, M_MD_INTEL);
+ return (error);
+}
+
static struct g_raid_disk *
g_raid_md_intel_get_disk(struct g_raid_softc *sc, int id)
{
@@ -508,10 +531,8 @@ g_raid_md_intel_start_disk(struct g_raid
return (0);
}
/* If we are in the start process, that's all for now. */
- if (!mdi->mdio_started) {
- g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE);
- return (0);
- }
+ if (!mdi->mdio_started)
+ goto nofit;
/* If we have already started - try to get use of the disk. */
TAILQ_FOREACH(olddisk, &sc->sc_disks, d_next) {
if (olddisk->d_state != G_RAID_DISK_S_OFFLINE &&
@@ -520,15 +541,31 @@ g_raid_md_intel_start_disk(struct g_raid
/* Make sure this disk is big enough. */
TAILQ_FOREACH(sd, &olddisk->d_subdisks, sd_next) {
if (sd->sd_offset + sd->sd_size + 4096 >
- pd->pd_disk_meta.sectors * 512) {
- continue;
+ (uint64_t)pd->pd_disk_meta.sectors * 512) {
+ G_RAID_DEBUG(1,
+ "Disk too small (%llu < %llu)",
+ ((unsigned long long)
+ pd->pd_disk_meta.sectors) * 512,
+ (unsigned long long)
+ sd->sd_offset + sd->sd_size + 4096);
+ break;
}
}
+ if (sd != NULL)
+ continue;
break;
}
if (olddisk == NULL) {
- g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE);
- return (0);
+nofit:
+ if (pd->pd_disk_meta.flags & INTEL_F_SPARE) {
+ g_raid_change_disk_state(disk,
+ G_RAID_DISK_S_SPARE);
+ return (1);
+ } else {
+ g_raid_change_disk_state(disk,
+ G_RAID_DISK_S_STALE);
+ return (0);
+ }
}
oldpd = (struct g_raid_md_intel_perdisk *)olddisk->d_md_data;
disk_pos = oldpd->pd_disk_pos;
@@ -574,6 +611,8 @@ g_raid_md_intel_start_disk(struct g_raid
g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
else if (meta->disk[disk_pos].flags & INTEL_F_FAILED)
g_raid_change_disk_state(disk, G_RAID_DISK_S_FAILED);
+ else if (meta->disk[disk_pos].flags & INTEL_F_SPARE)
+ g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE);
else
g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
@@ -627,16 +666,34 @@ g_raid_md_intel_start_disk(struct g_raid
g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
G_RAID_EVENT_SUBDISK);
}
+
+ /* Update status of our need for spare. */
+ if (mdi->mdio_started) {
+ mdi->mdio_incomplete =
+ (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) <
+ meta->total_disks);
+ }
+
return (resurrection);
}
static void
+g_disk_md_intel_retaste(void *arg, int pending)
+{
+
+ G_RAID_DEBUG(1, "Array is not complete, trying to retaste.");
+ g_retaste(&g_raid_class);
+ free(arg, M_MD_INTEL);
+}
+
+static void
g_raid_md_intel_refill(struct g_raid_softc *sc)
{
struct g_raid_md_object *md;
struct g_raid_md_intel_object *mdi;
struct intel_raid_conf *meta;
struct g_raid_disk *disk;
+ struct task *task;
int update;
md = sc->sc_md;
@@ -649,7 +706,7 @@ g_raid_md_intel_refill(struct g_raid_sof
meta->total_disks)
break;
- G_RAID_DEBUG(1, "Array is not complete. trying to refill.");
+ G_RAID_DEBUG(1, "Array is not complete, trying to refill.");
/* Try to get use some of STALE disks. */
TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
@@ -675,6 +732,18 @@ g_raid_md_intel_refill(struct g_raid_sof
/* Write new metadata if we changed something. */
if (update)
g_raid_md_write_intel(md, NULL, NULL, NULL);
+
+ /* Update status of our need for spare. */
+ mdi->mdio_incomplete = (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) <
+ meta->total_disks);
+
+ /* Request retaste hoping to find spare. */
+ if (mdi->mdio_incomplete) {
+ task = malloc(sizeof(struct task),
+ M_MD_INTEL, M_WAITOK | M_ZERO);
+ TASK_INIT(task, 0, g_disk_md_intel_retaste, task);
+ taskqueue_enqueue(taskqueue_swi, task);
+ }
}
static void
@@ -889,9 +958,10 @@ g_raid_md_taste_intel(struct g_raid_md_o
struct intel_raid_conf *meta;
struct g_raid_md_intel_perdisk *pd;
struct g_geom *geom;
- int error, disk_pos, result;
+ int error, disk_pos, result, spare, len;
char serial[INTEL_SERIAL_LEN];
char name[16];
+ uint16_t vendor;
G_RAID_DEBUG(1, "Tasting Intel on %s", cp->provider->name);
mdi = (struct g_raid_md_intel_object *)md;
@@ -899,6 +969,9 @@ g_raid_md_taste_intel(struct g_raid_md_o
/* Read metadata from device. */
meta = NULL;
+ spare = 0;
+ vendor = 0xffff;
+ disk_pos = 0;
if (g_access(cp, 1, 0, 0) != 0)
return (G_RAID_MD_TASTE_FAIL);
g_topology_unlock();
@@ -908,11 +981,27 @@ g_raid_md_taste_intel(struct g_raid_md_o
pp->name, error);
goto fail2;
}
+ len = 2;
+ if (pp->geom->rank == 1)
+ g_io_getattr("GEOM::hba_vendor", cp, &len, &vendor);
meta = intel_meta_read(cp);
g_topology_lock();
g_access(cp, -1, 0, 0);
- if (meta == NULL)
+ if (meta == NULL) {
+ if (g_raid_aggressive_spare) {
+ if (vendor == 0x8086) {
+ G_RAID_DEBUG(1,
+ "No Intel metadata, forcing spare.");
+ spare = 2;
+ goto search;
+ } else {
+ G_RAID_DEBUG(1,
+ "Intel vendor mismatch 0x%04x != 0x8086",
+ vendor);
+ }
+ }
return (G_RAID_MD_TASTE_FAIL);
+ }
/* Check this disk position in obtained metadata. */
disk_pos = intel_meta_find_disk(meta, serial);
@@ -931,7 +1020,9 @@ g_raid_md_taste_intel(struct g_raid_md_o
/* Metadata valid. Print it. */
g_raid_md_intel_print(meta);
G_RAID_DEBUG(1, "Intel disk position %d", disk_pos);
+ spare = meta->disk[disk_pos].flags & INTEL_F_SPARE;
+search:
/* Search for matching node. */
sc = NULL;
mdi1 = NULL;
@@ -944,9 +1035,13 @@ g_raid_md_taste_intel(struct g_raid_md_o
if (sc->sc_md->mdo_class != md->mdo_class)
continue;
mdi1 = (struct g_raid_md_intel_object *)sc->sc_md;
- if (mdi1->mdio_config_id != meta->config_id)
- continue;
- break;
+ if (spare) {
+ if (mdi1->mdio_incomplete)
+ break;
+ } else {
+ if (mdi1->mdio_config_id == meta->config_id)
+ break;
+ }
}
/* Found matching node. */
@@ -954,7 +1049,11 @@ g_raid_md_taste_intel(struct g_raid_md_o
G_RAID_DEBUG(1, "Found matching node %s", sc->sc_name);
result = G_RAID_MD_TASTE_EXISTING;
- } else { /* Not found matching node. */
+ } else if (spare) { /* Not found needy node -- left for later. */
+ G_RAID_DEBUG(1, "Spare is not needed at this time");
+ goto fail1;
+
+ } else { /* Not found matching node -- create one. */
result = G_RAID_MD_TASTE_NEW;
mdi->mdio_config_id = meta->config_id;
snprintf(name, sizeof(name), "Intel-%08x", meta->config_id);
@@ -980,7 +1079,14 @@ g_raid_md_taste_intel(struct g_raid_md_o
pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO);
pd->pd_meta = meta;
pd->pd_disk_pos = -1;
- pd->pd_disk_meta = meta->disk[disk_pos];
+ if (spare == 2) {
+ memcpy(&pd->pd_disk_meta.serial[0], serial, INTEL_SERIAL_LEN);
+ pd->pd_disk_meta.sectors = pp->mediasize / pp->sectorsize;
+ pd->pd_disk_meta.id = 0;
+ pd->pd_disk_meta.flags = INTEL_F_SPARE;
+ } else {
+ pd->pd_disk_meta = meta->disk[disk_pos];
+ }
disk = g_raid_create_disk(sc);
disk->d_md_data = (void *)pd;
disk->d_consumer = rcp;
@@ -1392,11 +1498,14 @@ g_raid_md_ctl_intel(struct g_raid_md_obj
INTEL_SERIAL_LEN);
pd->pd_disk_meta.sectors = pp->mediasize / pp->sectorsize;
pd->pd_disk_meta.id = 0;
- pd->pd_disk_meta.flags = INTEL_F_ASSIGNED | INTEL_F_ONLINE;
+ pd->pd_disk_meta.flags = INTEL_F_SPARE;
/* Welcome the "new" disk. */
update += g_raid_md_intel_start_disk(disk);
- if (disk->d_state != G_RAID_DISK_S_ACTIVE) {
+ if (disk->d_state == G_RAID_DISK_S_SPARE) {
+ intel_meta_write_spare(cp, &pd->pd_disk_meta);
+ g_raid_destroy_disk(disk);
+ } else if (disk->d_state != G_RAID_DISK_S_ACTIVE) {
gctl_error(req, "Disk '%s' doesn't fit.",
diskname);
g_raid_destroy_disk(disk);
@@ -1445,7 +1554,7 @@ g_raid_md_write_intel(struct g_raid_md_o
numdisks++;
if (disk->d_state == G_RAID_DISK_S_ACTIVE) {
pd->pd_disk_meta.flags =
- INTEL_F_ASSIGNED | INTEL_F_ONLINE;
+ INTEL_F_ONLINE | INTEL_F_ASSIGNED;
} else if (disk->d_state == G_RAID_DISK_S_FAILED) {
pd->pd_disk_meta.flags = INTEL_F_FAILED | INTEL_F_ASSIGNED;
} else {
More information about the svn-src-projects
mailing list