[PATCH 01/31] Add takeover support for external meta

[PATCH 01/31] Add takeover support for external meta

am 09.11.2010 18:00:40 von adam.kwolek

When performing takeover 0->10 or 10->0 mdmon should update the external metadata (due to disk slot changes).
To achieve that mdadm, after changing the level in md, mdadm calls update_super with "update_level" type.
update_super() allocates a new imsm_dev with updated disk slot numbers to be processed by mdmon in process_update().
process_update() discovers missing disks and adds them to imsm metadata.

Signed-off-by: Maciej Trela
Signed-off-by: Adam Kwolek
---

mdadm/mdadm/Grow.c | 14 ++
mdadm/mdadm/managemon.c | 15 ++
mdadm/mdadm/mdadm.h | 1
mdadm/mdadm/mdstat.c | 24 ++++
mdadm/mdadm/monitor.c | 2
mdadm/mdadm/super-intel.c | 304 +++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 355 insertions(+), 5 deletions(-)

diff --git a/mdadm/mdadm/Grow.c b/mdadm/mdadm/Grow.c index ae60f64..1054979 100644
--- a/mdadm/mdadm/Grow.c
+++ b/mdadm/mdadm/Grow.c
@@ -821,7 +821,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
st->update_tail = &st->updates;
}

- sra = sysfs_read(fd, 0, GET_LEVEL);
+ sra = sysfs_read(fd, 0, GET_VERSION | GET_LEVEL);
if (sra) {
if (st->ss->external && st->subarray[0] == 0) {
array.level = LEVEL_CONTAINER;
@@ -1003,6 +1003,18 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
fprintf(stderr, Name " level of %s changed to %s\n",
devname, c);
changed = 1;
+
+ st = super_by_fd(fd);
+ if (!st) {
+ fprintf(stderr, Name ": cannot handle this array\n");
+ return 1;
+ } else {
+ if (st && reshape_super(st, -1, level, UnSet, 0, 0, NULL, devname, !quiet)) {
+ rv = 1;
+ goto release;
+ }
+ sync_metadata(st);
+ }
}
}

diff --git a/mdadm/mdadm/managemon.c b/mdadm/mdadm/managemon.c index 164e4f8..493c96e 100644
--- a/mdadm/mdadm/managemon.c
+++ b/mdadm/mdadm/managemon.c
@@ -381,6 +381,9 @@ static int disk_init_and_add(struct mdinfo *disk, struct mdinfo *clone, static void manage_member(struct mdstat_ent *mdstat,
struct active_array *a)
{
+ struct active_array *newa;
+ int level;
+
/* Compare mdstat info with known state of member array.
* We do not need to look for device state changes here, that
* is dealt with by the monitor.
@@ -408,6 +411,16 @@ static void manage_member(struct mdstat_ent *mdstat,
else
frozen = 1; /* can't read metadata_version assume the worst */

+ level = mdstat_to_level(mdstat);
+ if (a->info.array.level != level && level >= 0) {
+ newa = duplicate_aa(a);
+ if (newa) {
+ newa->info.array.level = level;
+ replace_array(a->container, a, newa);
+ a = newa;
+ }
+ }
+
if (a->check_degraded && !frozen) {
struct metadata_update *updates = NULL;
struct mdinfo *newdev = NULL;
@@ -615,7 +628,7 @@ static void handle_message(struct supertype *container, struct metadata_update *

if (msg->len == 0) { /* ping_monitor */
int cnt;
-
+
cnt = monitor_loop_cnt;
if (cnt & 1)
cnt += 2; /* wait until next pselect */ diff --git a/mdadm/mdadm/mdadm.h b/mdadm/mdadm/mdadm.h index 64b32cc..fa800f9 100644
--- a/mdadm/mdadm/mdadm.h
+++ b/mdadm/mdadm/mdadm.h
@@ -373,6 +373,7 @@ extern void mdstat_wait(int seconds); extern void mdstat_wait_fd(int fd, const sigset_t *sigmask); extern int mddev_busy(int devnum); extern struct mdstat_ent *mdstat_by_component(char *name);
+int mdstat_to_level(struct mdstat_ent *ms);

struct map_ent {
struct map_ent *next;
diff --git a/mdadm/mdadm/mdstat.c b/mdadm/mdadm/mdstat.c index 47be2bb..47f54cb 100644
--- a/mdadm/mdadm/mdstat.c
+++ b/mdadm/mdadm/mdstat.c
@@ -282,6 +282,30 @@ struct mdstat_ent *mdstat_read(int hold, int start)
return rv;
}

+int mdstat_to_level(struct mdstat_ent *ms) {
+ if (strncmp(ms->level, "raid0", 5) == 0)
+ return 0;
+ else if (strncmp(ms->level, "raid10", 6) == 0)
+ return 10;
+ else if (strncmp(ms->level, "raid1", 5) == 0)
+ return 1;
+ else if (strncmp(ms->level, "raid4", 5) == 0)
+ return 4;
+ else if (strncmp(ms->level, "raid5", 5) == 0)
+ return 5;
+ else if (strncmp(ms->level, "raid6", 5) == 0)
+ return 6;
+ else if (strncmp(ms->level, "linear", 6) == 0)
+ return LEVEL_LINEAR;
+ else if (strncmp(ms->level, "faulty", 6) == 0)
+ return LEVEL_FAULTY;
+ else if (strncmp(ms->level, "multipath", 9) == 0)
+ return LEVEL_MULTIPATH;
+
+ return LEVEL_UNSUPPORTED;
+}
+
void mdstat_wait(int seconds)
{
fd_set fds;
diff --git a/mdadm/mdadm/monitor.c b/mdadm/mdadm/monitor.c index 23ff9ab..46c044e 100644
--- a/mdadm/mdadm/monitor.c
+++ b/mdadm/mdadm/monitor.c
@@ -487,7 +487,7 @@ static int wait_and_act(struct supertype *container, int nowait)
/* once an array has been deactivated we want to
* ask the manager to discard it.
*/
- if (!a->container) {
+ if (!a->container || a->info.array.level == 0) {
if (discard_this) {
ap = &(*ap)->next;
continue;
diff --git a/mdadm/mdadm/super-intel.c b/mdadm/mdadm/super-intel.c index 7c5fcc4..5caa8fa 100644
--- a/mdadm/mdadm/super-intel.c
+++ b/mdadm/mdadm/super-intel.c
@@ -285,6 +285,7 @@ enum imsm_update_type {
update_kill_array,
update_rename_array,
update_add_disk,
+ update_level,
};

struct imsm_update_activate_spare {
@@ -320,6 +321,13 @@ struct imsm_update_add_disk {
enum imsm_update_type type;
};

+struct imsm_update_level {
+ enum imsm_update_type type;
+ int delta_disks;
+ int container_member;
+ struct imsm_dev dev;
+};
+
static struct supertype *match_metadata_desc_imsm(char *arg) {
struct supertype *st;
@@ -1666,6 +1674,9 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info)
}
}

+static int is_raid_level_supported(const struct imsm_orom *orom, int
+level, int raiddisks); static void imsm_copy_dev(struct imsm_dev *dest,
+struct imsm_dev *src);
+
static int update_super_imsm(struct supertype *st, struct mdinfo *info,
char *update, char *devname, int verbose,
int uuid_set, char *homehost)
@@ -1698,12 +1709,15 @@ static int update_super_imsm(struct supertype *st, struct mdinfo *info,
struct intel_super *super = st->sb;
struct imsm_super *mpb;

- /* we can only update container info */
- if (!super || super->current_vol >= 0 || !super->anchor)
+ if (!super || !super->anchor)
return 1;

mpb = super->anchor;

+ /* we can only update container info */
+ if (super->current_vol >= 0)
+ return 1;
+
if (strcmp(update, "uuid") == 0 && uuid_set && !info->update_private)
fprintf(stderr,
Name ": '--uuid' not supported for imsm metadata\n"); @@ -1778,6 +1792,45 @@ static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src)
memcpy(dest, src, sizeof_imsm_dev(src, 0)); }

+struct imsm_dev *reallocate_imsm_dev(struct intel_super *super,
+ unsigned int array_index,
+ int map_num_members)
+{
+ struct imsm_dev *newdev = NULL;
+ struct imsm_dev *retval = NULL;
+ struct intel_dev *dv = NULL;
+ struct imsm_dev *dv_free = NULL;
+ int memNeeded;
+
+ if (!super)
+ return NULL;
+
+ /* Calculate space needed for imsm_dev with a double map */
+ memNeeded = sizeof(struct imsm_dev) + sizeof(__u32) * (map_num_members - 1) +
+ sizeof(struct imsm_map) + sizeof(__u32) * (map_num_members - 1);
+
+ newdev = malloc(memNeeded);
+ if (!newdev) {
+ fprintf(stderr, "error: imsm meta update not possible due to no memory conditions\n");
+ return NULL;
+ }
+ /* Find our device */
+ for (dv = super->devlist; dv; dv = dv->next)
+ if (dv->index == array_index) {
+ /* Copy imsm_dev into the new buffer */
+ imsm_copy_dev(newdev, dv->dev);
+ dv_free = dv->dev;
+ dv->dev = newdev;
+ retval = newdev;
+ free(dv_free);
+ break;
+ }
+ if (retval == NULL)
+ free(newdev);
+
+ return retval;
+}
+
static int compare_super_imsm(struct supertype *st, struct supertype *tst) {
/*
@@ -5123,6 +5176,57 @@ static void imsm_process_update(struct supertype *st,
mpb = super->anchor;

switch (type) {
+ case update_level: {
+ struct imsm_update_level *u = (void *)update->buf;
+ struct imsm_dev *dev_new, *dev = NULL;
+ struct imsm_map *map;
+ struct dl *d;
+ int i;
+ int start_disk;
+
+ dev_new = &u->dev;
+ for (i = 0; i < mpb->num_raid_devs; i++) {
+ dev = get_imsm_dev(super, i);
+ if (strcmp((char *)dev_new->volume, (char *)dev->volume) == 0)
+ break;
+ }
+ if (i == super->anchor->num_raid_devs)
+ return;
+
+ if (dev == NULL)
+ return;
+
+ imsm_copy_dev(dev, dev_new);
+ map = get_imsm_map(dev, 0);
+ start_disk = mpb->num_disks;
+ mpb->num_disks += u->delta_disks;
+
+ /* clear missing disks list */
+ while (super->missing) {
+ d = super->missing;
+ super->missing = d->next;
+ __free_imsm_disk(d);
+ }
+ find_missing(super);
+
+ /* clear new disk entries if number of disks increased*/
+ d = super->missing;
+ for (i = start_disk; i < map->num_members; i++) {
+ assert(d != NULL);
+ if (!d)
+ break;
+ memset(&d->disk, 0, sizeof(d->disk));
+ strcpy((char *)d->disk.serial, "MISSING");
+ d->disk.total_blocks = map->blocks_per_member;
+ /* Set slot for missing disk */
+ set_imsm_ord_tbl_ent(map, i, d->index | IMSM_ORD_REBUILD);
+ d->raiddisk = i;
+ d = d->next;
+ }
+
+ super->updates_pending++;
+ break;
+ }
case update_activate_spare: {
struct imsm_update_activate_spare *u = (void *) update->buf;
struct imsm_dev *dev = get_imsm_dev(super, u->array); @@ -5442,6 +5546,26 @@ static void imsm_prepare_update(struct supertype *st,
size_t len = 0;

switch (type) {
+ case update_level: {
+ struct imsm_update_level *u = (void *) update->buf;
+ struct active_array *a;
+
+ dprintf("prepare_update(): update level\n");
+ len += u->delta_disks * sizeof(struct imsm_disk) +
+ u->delta_disks * sizeof(__u32);
+
+ for (a = st->arrays; a; a = a->next)
+ if (a->info.container_member == u->container_member)
+ break;
+ if (a == NULL)
+ break; /* what else we can do here? */
+
+ /* we'll add new disks to imsm_dev */
+ if (u->delta_disks > 0)
+ reallocate_imsm_dev(super, u->container_member,
+ a->info.array.raid_disks);
+ break;
+ }
case update_create_array: {
struct imsm_update_create_array *u = (void *) update->buf;
struct intel_dev *dv;
@@ -5561,6 +5685,181 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind } #endif /* MDASSEMBLE */

+static int update_level_imsm(struct supertype *st, struct mdinfo *info,
+ char *devname, int verbose,
+ int uuid_set, char *homehost)
+{
+ struct intel_super *super = st->sb;
+ struct imsm_super *mpb = super->anchor;
+ struct imsm_update_level *u;
+ struct imsm_dev *dev_new, *dev = NULL;
+ struct imsm_map *map_new, *map;
+ struct mdinfo *newdi;
+ struct dl *dl;
+ int *tmp_ord_tbl;
+ int i, slot, idx;
+ int len, disks;
+
+ if (!is_raid_level_supported(super->orom,
+ info->array.level,
+ info->array.raid_disks))
+ return 1;
+
+ for (i = 0; i < mpb->num_raid_devs; i++) {
+ dev = get_imsm_dev(super, i);
+ if (strcmp(devname, (char *)dev->volume) == 0)
+ break;
+ }
+ if (dev == NULL)
+ return 1;
+
+ if (i == super->anchor->num_raid_devs)
+ return 1;
+
+ map = get_imsm_map(dev, 0);
+
+ /* update level is needed only for 0->10 and 10->0 transitions */
+ if ((info->array.level != 10 || map->raid_level != 0) &&
+ (info->array.level != 0 || map->raid_level != 10))
+ return 1;
+
+ disks = (info->array.raid_disks > map->num_members) ?
+ info->array.raid_disks : map->num_members;
+ len = sizeof(struct imsm_update_level) +
+ ((disks - 1) * sizeof(__u32));
+
+ u = malloc(len);
+ if (u == NULL)
+ return 1;
+
+ dev_new = &u->dev;
+ imsm_copy_dev(dev_new, dev);
+ map_new = get_imsm_map(dev_new, 0);
+
+ tmp_ord_tbl = malloc(sizeof(int) * disks);
+ if (tmp_ord_tbl == NULL) {
+ free(u);
+ return 1;
+ }
+
+ for (i = 0; i < disks; i++)
+ tmp_ord_tbl[i] = -1;
+
+ /* iterate through devices to detect slot changes */
+ for (dl = super->disks; dl; dl = dl->next)
+ for (newdi = info->devs; newdi; newdi = newdi->next) {
+ if ((dl->major != newdi->disk.major) ||
+ (dl->minor != newdi->disk.minor))
+ continue;
+ slot = get_imsm_disk_slot(map, dl->index);
+ idx = get_imsm_ord_tbl_ent(dev_new, slot);
+ tmp_ord_tbl[newdi->disk.raid_disk] = idx;
+ break;
+ }
+
+ for (i = 0; i < disks; i++)
+ set_imsm_ord_tbl_ent(map_new, i, tmp_ord_tbl[i]);
+ free(tmp_ord_tbl);
+ map_new->raid_level = info->array.level;
+ map_new->num_members = info->array.raid_disks;
+ u->type = update_level;
+ u->delta_disks = info->array.raid_disks - map->num_members;
+ u->container_member = info->container_member;
+ append_metadata_update(st, u, len);
+
+ return 0;
+}
+
+
+int imsm_reshape_super(struct supertype *st, long long size, int level,
+ int layout, int chunksize, int raid_disks,
+ char *backup, char*dev, int verbouse) {
+ int ret_val = 1;
+ struct mdinfo *sra = NULL;
+ int fd = -1;
+ char buf[PATH_MAX];
+ char *devname = NULL;
+
+ devname = devnum2devname(st->devnum);
+ if (devname == NULL) {
+ dprintf("imsm: Error: imsm_reshape_super(): cannot get device name.\n");
+ return ret_val;
+ }
+
+ snprintf(buf, PATH_MAX, "/dev/%s", devname);
+ fd = open(buf , O_RDONLY | O_DIRECT);
+ if (fd < 0) {
+ dprintf("imsm: cannot open device\n");
+ goto imsm_reshape_super_exit;
+ }
+
+ if ((size == -1) && (layout == UnSet) && (raid_disks == 0) && (level != UnSet)) {
+ /* ok - this is takeover */
+ int container_fd;
+ int dn;
+ int err;
+
+ sra = sysfs_read(fd, 0, GET_VERSION | GET_LEVEL |
+ GET_LAYOUT | GET_DISKS | GET_DEVS);
+ if (sra == NULL) {
+ fprintf(stderr, Name ": Cannot read sysfs info (imsm)\n");
+ goto imsm_reshape_super_exit;
+ }
+ dn = devname2devnum(sra->text_version + 1);
+ container_fd = open_dev_excl(dn);
+ if (container_fd < 0) {
+ fprintf(stderr, Name ": Cannot get exclusive access "
+ "to container (imsm).\n");
+ goto imsm_reshape_super_exit;
+ }
+ st->ss->load_super(st, container_fd, NULL);
+ close(container_fd);
+ st->ss->getinfo_super(st, sra);
+
+ /* send metadata update for raid10 takeover
+ * this means we are going from/to raid10
+ * to/from different than raid10 level
+ * if source level is raid0 mdmon is sterted only
+ */
+ if (((level == 10) || (sra->array.level == 10) || (sra->array.level == 0)) &&
+ (level != sra->array.level) &&
+ (level > 0)) {
+ st->update_tail = &st->updates;
+ err = update_level_imsm(st, sra, sra->name, 0, 0, NULL);
+ if (err != 0) {
+ /* no need to perform any updates with mdmon */
+ ret_val = 0;
+ goto imsm_reshape_super_exit;
+ }
+ /* if raid0 was takeovered by any other
+ * personality start mdmon */
+ if (sra->array.level == 0) {
+ char *dname = devnum2devname(dn);
+ if (!mdmon_running(dn))
+ start_mdmon(dn);
+ if (dname) {
+ ping_monitor(dname);
+ free(dname);
+ }
+ }
+ ret_val = 0;
+ }
+ sysfs_free(sra);
+ sra = NULL;
+ }
+
+imsm_reshape_super_exit:
+ sysfs_free(sra);
+ if (fd >= 0)
+ close(fd);
+ if (devname)
+ free(devname);
+
+ dprintf("imsm: reshape_super Exit code = %i\n", ret_val);
+ return ret_val;
+}
+
struct superswitch super_imsm = {
#ifndef MDASSEMBLE
.examine_super = examine_super_imsm,
@@ -5592,6 +5891,7 @@ struct superswitch super_imsm = {
.match_metadata_desc = match_metadata_desc_imsm,
.container_content = container_content_imsm,
.default_geometry = default_geometry_imsm,
+ .reshape_super = imsm_reshape_super,

.external = 1,
.name = "imsm",

--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html