[PATCH 1/2] IMSM: Fix problem in mdmon monitor of using removeddisk from in imsm container.

[PATCH 1/2] IMSM: Fix problem in mdmon monitor of using removeddisk from in imsm container.

am 07.12.2010 17:07:35 von Marcin.Labun

From 4bd19fb7b8a4258bf6cf34288be635bdb9af3dbe Mon Sep 17 00:00:00 2001
From: Marcin Labun
Date: Wed, 30 Nov 2010 03:55:18 +0100
Subject: [PATCH 1/2] IMSM: Fix problem in mdmon monitor of using removed disk from in imsm container.

Manager thread shall pass the information to monitor thread (mdmon)
that some devices are removed from container. Otherwise, monitor (mdmon)
might use such devices (spares) to rebuild the array that has gone degraded.

This problem happens for imsm containers, since a list of the container disks
is maintained in intel_super structure. When array goes degraded, the list is
searched to find a spare disks to start rebuild.
Without this fix the rebuild could be stared on the spare device that was
a member of the container, but has been removed from it.

New super type function handler has been introduced to prepare metadata
format specific information about removed devices.
int (*remove_from_super)(struct supertype *st, mdu_disk_info_t *dinfo,
int fd);
The message prepared in remove_from_super is later processed
by proceess_update handler in monitor thread.

Signed-off-by: Marcin Labun
---
managemon.c | 37 ++++++++++++
mdadm.h | 7 ++-
super-intel.c | 183 +++++++++++++++++++++++++++++++++++++++++++++++---------
3 files changed, 196 insertions(+), 31 deletions(-)

diff --git a/managemon.c b/managemon.c
index bab0397..61037d1 100644
--- a/managemon.c
+++ b/managemon.c
@@ -297,6 +297,42 @@ static void add_disk_to_container(struct supertype *st, struct mdinfo *sd)
st->update_tail = NULL;
}

+/*
+ * Create and queue update structure about the removed disks.
+ * The update is prepared by super type handler and passed to the monitor
+ * thread.
+ */
+static void remove_disk_from_container(struct supertype *st, struct mdinfo *sd)
+{
+ int dfd;
+ char nm[20];
+ struct metadata_update *update = NULL;
+ mdu_disk_info_t dk = {
+ .number = -1,
+ .major = sd->disk.major,
+ .minor = sd->disk.minor,
+ .raid_disk = -1,
+ .state = 0,
+ };
+ /* nothing to do if super type handler does not support
+ * remove disk primitive
+ */
+ if (!st->ss->remove_from_super)
+ return;
+ dprintf("%s: remove %d:%d from container\n",
+ __func__, sd->disk.major, sd->disk.minor);
+
+ sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
+ /* accept that dev might be gone */
+ dfd = dev_open(nm, O_RDWR);
+
+ st->update_tail = &update;
+ st->ss->remove_from_super(st, &dk, dfd);
+ st->ss->write_init_super(st);
+ queue_metadata_update(update);
+ st->update_tail = NULL;
+}
+
static void manage_container(struct mdstat_ent *mdstat,
struct supertype *container)
{
@@ -334,6 +370,7 @@ static void manage_container(struct mdstat_ent *mdstat,
if (!found) {
cd = *cdp;
*cdp = (*cdp)->next;
+ remove_disk_from_container(container, cd);
free(cd);
} else
cdp = &(*cdp)->next;
diff --git a/mdadm.h b/mdadm.h
index b2d39c1..b188f79 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -597,7 +597,12 @@ extern struct superswitch {
* when hot-adding a spare.
*/
int (*add_to_super)(struct supertype *st, mdu_disk_info_t *dinfo,
- int fd, char *devname);
+ int fd, char *devname);
+ /* update the metadata to delete a device,
+ * when hot-removing a spare.
+ */
+ int (*remove_from_super)(struct supertype *st, mdu_disk_info_t *dinfo,
+ int fd);

/* Write metadata to one device when fixing problems or adding
* a new device.
diff --git a/super-intel.c b/super-intel.c
index 8ef6164..1cee6c1 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -233,6 +233,10 @@ struct intel_dev {
unsigned index;
};

+enum action {
+ DISK_REMOVE = 1,
+ DISK_ADD
+};
/* internal representation of IMSM metadata */
struct intel_super {
union {
@@ -258,8 +262,10 @@ struct intel_super {
int extent_cnt;
struct extent *e; /* for determining freespace @ create */
int raiddisk; /* slot to fill in autolayout */
+ enum action action;
} *disks;
- struct dl *add; /* list of disks to add while mdmon active */
+ struct dl *disk_mgmt_list; /* list of disks to add/remove while mdmon
+ active */
struct dl *missing; /* disks removed while we weren't looking */
struct bbm_log *bbm_log;
const char *hba; /* device path of the raid controller for this metadata */
@@ -285,6 +291,7 @@ enum imsm_update_type {
update_kill_array,
update_rename_array,
update_add_disk,
+ update_add_remove_disk
};

struct imsm_update_activate_spare {
@@ -316,7 +323,7 @@ struct imsm_update_rename_array {
int dev_idx;
};

-struct imsm_update_add_disk {
+struct imsm_update_add_remove_disk {
enum imsm_update_type type;
};

@@ -2404,6 +2411,7 @@ static void __free_imsm_disk(struct dl *d)
free(d);

}
+
static void free_imsm_disks(struct intel_super *super)
{
struct dl *d;
@@ -3408,6 +3416,7 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
dd->devname = devname ? strdup(devname) : NULL;
dd->fd = fd;
dd->e = NULL;
+ dd->action = DISK_ADD;
rv = imsm_read_serial(fd, devname, dd->serial);
if (rv) {
fprintf(stderr,
@@ -3427,8 +3436,8 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
dd->disk.scsi_id = __cpu_to_le32(0);

if (st->update_tail) {
- dd->next = super->add;
- super->add = dd;
+ dd->next = super->disk_mgmt_list;
+ super->disk_mgmt_list = dd;
} else {
dd->next = super->disks;
super->disks = dd;
@@ -3437,6 +3446,45 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
return 0;
}

+
+static int remove_from_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
+ int fd)
+{
+ struct intel_super *super = st->sb;
+ struct dl *dd;
+
+ /* remove from super works only in mdmon - for communication
+ * manager - monitor. Check if communication memory buffer
+ * is prepared.
+ */
+ if (!st->update_tail) {
+ fprintf(stderr,
+ Name ": %s shall be used in mdmon context only"
+ "(line %d).\n", __func__, __LINE__);
+ return 1;
+ }
+ dd = malloc(sizeof(*dd));
+ if (!dd) {
+ fprintf(stderr,
+ Name ": malloc failed %s:%d.\n", __func__, __LINE__);
+ return 1;
+ }
+ memset(dd, 0, sizeof(*dd));
+ dd->major = dk->major;
+ dd->minor = dk->minor;
+ dd->index = -1;
+ dd->fd = fd;
+ dd->disk.status = SPARE_DISK;
+ dd->action = DISK_REMOVE;
+
+ if (st->update_tail) {
+ dd->next = super->disk_mgmt_list;
+ super->disk_mgmt_list = dd;
+ }
+
+ return 0;
+}
+
static int store_imsm_mpb(int fd, struct imsm_super *mpb);

static union {
@@ -3589,13 +3637,13 @@ static int create_array(struct supertype *st, int dev_idx)
return 0;
}

-static int _add_disk(struct supertype *st)
+static int mgmt_disk(struct supertype *st)
{
struct intel_super *super = st->sb;
size_t len;
- struct imsm_update_add_disk *u;
+ struct imsm_update_add_remove_disk *u;

- if (!super->add)
+ if (!super->disk_mgmt_list)
return 0;

len = sizeof(*u);
@@ -3606,7 +3654,7 @@ static int _add_disk(struct supertype *st)
return 1;
}

- u->type = update_add_disk;
+ u->type = update_add_remove_disk;
append_metadata_update(st, u, len);

return 0;
@@ -3628,10 +3676,10 @@ static int write_init_super_imsm(struct supertype *st)

/* determine if we are creating a volume or adding a disk */
if (current_vol < 0) {
- /* in the add disk case we are running in mdmon
- * context, so don't close fd's
+ /* in the mgmt (add/remove) disk case we are running
+ * in mdmon context, so don't close fd's
*/
- return _add_disk(st);
+ return mgmt_disk(st);
} else
rv = create_array(st, current_vol);

@@ -4871,10 +4919,9 @@ static int store_imsm_mpb(int fd, struct imsm_super *mpb)
static void imsm_sync_metadata(struct supertype *container)
{
struct intel_super *super = container->sb;
-
+ dprintf("sync metadata: %d\n", super->updates_pending);
if (!super->updates_pending)
return;
-
write_super_imsm(super, 0);

super->updates_pending = 0;
@@ -5163,8 +5210,90 @@ static int disks_overlap(struct intel_super *super, int idx, struct imsm_update_
return 0;
}

+
+static struct dl *get_disk_super(struct intel_super *super, int major, int minor)
+{
+ struct dl *dl = NULL;
+ for (dl = super->disks; dl; dl = dl->next)
+ if ((dl->major == major) && (dl->minor == minor))
+ return dl;
+ return NULL;
+}
+
+static int remove_disk_super(struct intel_super *super, int major, int minor)
+{
+ struct dl *prev = NULL;
+ struct dl *dl;
+
+ prev = NULL;
+ for (dl = super->disks; dl; dl = dl->next) {
+ if ((dl->major == major) && (dl->minor == minor)) {
+ /* remove */
+ if (prev)
+ prev->next = dl->next;
+ else
+ super->disks = dl->next;
+ dl->next = NULL;
+ __free_imsm_disk(dl);
+ dprintf("%s: removed %x:%x\n",
+ __func__, major, minor);
+ break;
+ }
+ prev = dl;
+ }
+ return 0;
+}
+
static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned index);

+static int add_remove_disk_update(struct intel_super *super)
+{
+ int check_degraded = 0;
+ struct dl *disk = NULL;
+ /* add/remove some spares to/from the metadata/contrainer */
+ while (super->disk_mgmt_list) {
+ struct dl *disk_cfg;
+
+ disk_cfg = super->disk_mgmt_list;
+ super->disk_mgmt_list = disk_cfg->next;
+ disk_cfg->next = NULL;
+
+ if (disk_cfg->action == DISK_ADD) {
+ disk_cfg->next = super->disks;
+ super->disks = disk_cfg;
+ check_degraded = 1;
+ dprintf("%s: added %x:%x\n",
+ __func__, disk_cfg->major,
+ disk_cfg->minor);
+ } else if (disk_cfg->action == DISK_REMOVE) {
+ dprintf("Disk remove action processed: %x.%x\n",
+ disk_cfg->major, disk_cfg->minor);
+ disk = get_disk_super(super,
+ disk_cfg->major,
+ disk_cfg->minor);
+ if (disk) {
+ /* store action status */
+ disk->action = DISK_REMOVE;
+ /* remove spare disks only */
+ if (disk->index == -1) {
+ remove_disk_super(super,
+ disk_cfg->major,
+ disk_cfg->minor);
+ }
+ }
+ /* remove spare disks only */
+ if (disk->index == -1) {
+ remove_disk_super(super,
+ disk_cfg->major,
+ disk_cfg->minor);
+ }
+ /* release allocate disk structure */
+ __free_imsm_disk(disk_cfg);
+ }
+ }
+ return check_degraded;
+}
+
static void imsm_process_update(struct supertype *st,
struct metadata_update *update)
{
@@ -5474,31 +5603,24 @@ static void imsm_process_update(struct supertype *st,
super->updates_pending++;
break;
}
- case update_add_disk:
-
+ case update_add_remove_disk: {
/* we may be able to repair some arrays if disks are
- * being added */
- if (super->add) {
+ * being added, check teh status of add_remove_disk
+ * if discs has been added.
+ */
+ if (add_remove_disk_update(super)) {
struct active_array *a;

super->updates_pending++;
- for (a = st->arrays; a; a = a->next)
+ for (a = st->arrays; a; a = a->next)
a->check_degraded = 1;
}
- /* add some spares to the metadata */
- while (super->add) {
- struct dl *al;
-
- al = super->add;
- super->add = al->next;
- al->next = super->disks;
- super->disks = al;
- dprintf("%s: added %x:%x\n",
- __func__, al->major, al->minor);
- }
-
break;
}
+ default:
+ fprintf(stderr, "error: unsuported process update type:"
+ "(type: %d)\n", type);
+ }
}

static void imsm_prepare_update(struct supertype *st,
@@ -5684,6 +5806,7 @@ struct superswitch super_imsm = {
.validate_geometry = validate_geometry_imsm,
.default_chunk = default_chunk_imsm,
.add_to_super = add_to_super_imsm,
+ .remove_from_super = remove_from_super_imsm,
.detail_platform = detail_platform_imsm,
.kill_subarray = kill_subarray_imsm,
.update_subarray = update_subarray_imsm,
--
1.6.4.2
--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/2] IMSM: Fix problem in mdmon monitor of using removeddisk from in imsm container.

am 08.12.2010 03:29:42 von NeilBrown

On Tue, 7 Dec 2010 16:07:35 +0000 "Labun, Marcin"
wrote:

> >From 4bd19fb7b8a4258bf6cf34288be635bdb9af3dbe Mon Sep 17 00:00:00 2001
> From: Marcin Labun
> Date: Wed, 30 Nov 2010 03:55:18 +0100
> Subject: [PATCH 1/2] IMSM: Fix problem in mdmon monitor of using removed disk from in imsm container.
>
> Manager thread shall pass the information to monitor thread (mdmon)
> that some devices are removed from container. Otherwise, monitor (mdmon)
> might use such devices (spares) to rebuild the array that has gone degraded.
>
> This problem happens for imsm containers, since a list of the container disks
> is maintained in intel_super structure. When array goes degraded, the list is
> searched to find a spare disks to start rebuild.
> Without this fix the rebuild could be stared on the spare device that was
> a member of the container, but has been removed from it.
>
> New super type function handler has been introduced to prepare metadata
> format specific information about removed devices.
> int (*remove_from_super)(struct supertype *st, mdu_disk_info_t *dinfo,
> int fd);
> The message prepared in remove_from_super is later processed
> by proceess_update handler in monitor thread.

I don't like this. There is unnecessary complexity.

adding a disk and removing a disk are very different sorts of operations.
When adding a disk, you need to pass extra information about how the disk
might be used - whether it is already part of the array, or if it is a fresh
spare or whatever.
When removing a device there is none of that. Just remove the device.

So when mdadm removes a device from a container it should
- get a lock so mdmon won't assign the device as spare
- check that the device is still a spare
- remove the device from the container
- unlock
- ping mdmon

mdmon should notice that the device has gone and should update the metadata
accordingly.

So you may still need a 'remove_from_super' method, but it will not send a
metadata update request to mdmon.
Rather it will be run by mdmon when it notices the device is gone.

It is probably appropriate to pass an mdu_disk_info_t or maybe just a device
number. I don't think there is any need to pass an 'fd'.

Does that approach seem OK to you?

Thanks,
NeilBrown


--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html

RE: [PATCH 1/2] IMSM: Fix problem in mdmon monitor of using removeddisk from in imsm container.

am 08.12.2010 18:17:10 von Marcin.Labun

> -----Original Message-----
> From: Neil Brown [mailto:neilb@suse.de]
> Sent: Wednesday, December 08, 2010 3:30 AM
> To: Labun, Marcin
> Cc: linux-raid@vger.kernel.org; Neubauer, Wojciech; Williams, Dan J;
> Ciechanowski, Ed; Hawrylewicz Czarnowski, Przemyslaw; Czarnowska, Anna
> Subject: Re: [PATCH 1/2] IMSM: Fix problem in mdmon monitor of using
> removed disk from in imsm container.
>



> > int (*remove_from_super)(struct supertype *st, mdu_disk_info_t
> *dinfo,
> > int fd);
> > The message prepared in remove_from_super is later processed
> > by proceess_update handler in monitor thread.
>
> I don't like this. There is unnecessary complexity.
>
> adding a disk and removing a disk are very different sorts of
> operations.
> When adding a disk, you need to pass extra information about how the
> disk
> might be used - whether it is already part of the array, or if it is a
> fresh
> spare or whatever.
> When removing a device there is none of that. Just remove the device.
>
> So when mdadm removes a device from a container it should
> - get a lock so mdmon won't assign the device as spare
> - check that the device is still a spare
> - remove the device from the container
> - unlock
> - ping mdmon
>
> mdmon should notice that the device has gone and should update the
> metadata
> accordingly.

Let me add a couple of comments and questions, so I can understand correctly what solution you want to get.

My changes are in manager and monitor threads of mdmon.

As you said, manager notices that a device has gone.
So I added a call to new remove_disk_from_container in manage_container function (in manager thread).
remove_disk_from_container is called for every device that has disappeared.
The function remove_disk_from_container prepares an metadata update using handler functions and queues them using queue_metadata_update
And that's all for manager thread.

Now monitor thread of mdmon checks for metadata updates in update_queue global.
Monitor walks update__queue list to call process_update handler.
In IMSM process_update handler, I added code that removes spare disk descriptor from IMSM device list.

Now my questions:
- do you want to get rid of update_queue communication between manager and monitor for disk removal?

- do you want to add non-metadata specific communication between manager and monitor? This would introduce generic disk_remove message
that is passed from manager to monitor. The monitor would call metadata handler.

- do you want the monitor thread to find out what disks has been removed and call metadata handlers?

>
> So you may still need a 'remove_from_super' method, but it will not
> send a
> metadata update request to mdmon.
> Rather it will be run by mdmon when it notices the device is gone.
>
> It is probably appropriate to pass an mdu_disk_info_t or maybe just a
> device
> number. I don't think there is any need to pass an 'fd'.

That's correct: fd is not needed.

Thanks in advance,
Marcin

--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/2] IMSM: Fix problem in mdmon monitor of using removeddisk from in imsm container.

am 08.12.2010 22:37:24 von NeilBrown

On Wed, 8 Dec 2010 17:17:10 +0000 "Labun, Marcin"
wrote:

> > -----Original Message-----
> > From: Neil Brown [mailto:neilb@suse.de]
> > Sent: Wednesday, December 08, 2010 3:30 AM
> > To: Labun, Marcin
> > Cc: linux-raid@vger.kernel.org; Neubauer, Wojciech; Williams, Dan J;
> > Ciechanowski, Ed; Hawrylewicz Czarnowski, Przemyslaw; Czarnowska, Anna
> > Subject: Re: [PATCH 1/2] IMSM: Fix problem in mdmon monitor of using
> > removed disk from in imsm container.
> >
>
>
>
> > > int (*remove_from_super)(struct supertype *st, mdu_disk_info_t
> > *dinfo,
> > > int fd);
> > > The message prepared in remove_from_super is later processed
> > > by proceess_update handler in monitor thread.
> >
> > I don't like this. There is unnecessary complexity.
> >
> > adding a disk and removing a disk are very different sorts of
> > operations.
> > When adding a disk, you need to pass extra information about how the
> > disk
> > might be used - whether it is already part of the array, or if it is a
> > fresh
> > spare or whatever.
> > When removing a device there is none of that. Just remove the device.
> >
> > So when mdadm removes a device from a container it should
> > - get a lock so mdmon won't assign the device as spare
> > - check that the device is still a spare
> > - remove the device from the container
> > - unlock
> > - ping mdmon
> >
> > mdmon should notice that the device has gone and should update the
> > metadata
> > accordingly.
>
> Let me add a couple of comments and questions, so I can understand correctly what solution you want to get.
>
> My changes are in manager and monitor threads of mdmon.
>
> As you said, manager notices that a device has gone.
> So I added a call to new remove_disk_from_container in manage_container function (in manager thread).
> remove_disk_from_container is called for every device that has disappeared.
> The function remove_disk_from_container prepares an metadata update using handler functions and queues them using queue_metadata_update
> And that's all for manager thread.
>
> Now monitor thread of mdmon checks for metadata updates in update_queue global.
> Monitor walks update__queue list to call process_update handler.
> In IMSM process_update handler, I added code that removes spare disk descriptor from IMSM device list.
>
> Now my questions:
> - do you want to get rid of update_queue communication between manager and monitor for disk removal?
>
> - do you want to add non-metadata specific communication between manager and monitor? This would introduce generic disk_remove message
> that is passed from manager to monitor. The monitor would call metadata handler.
>
> - do you want the monitor thread to find out what disks has been removed and call metadata handlers?

It looks like I misunderstood your patch. I got the impression that it was
sending a metadata update from mdadm to mdmon, but from what you say it isn't
doing that.

If it is just being sent from manager to monitor then that is obviously fine.

I'll have another look at the code now that I have a better understanding of
what you intended - maybe it will be fine as it is (except for the extraneous
'fd' argument...)

Thanks,
NeilBrown


>
> >
> > So you may still need a 'remove_from_super' method, but it will not
> > send a
> > metadata update request to mdmon.
> > Rather it will be run by mdmon when it notices the device is gone.
> >
> > It is probably appropriate to pass an mdu_disk_info_t or maybe just a
> > device
> > number. I don't think there is any need to pass an 'fd'.
>
> That's correct: fd is not needed.
>
> Thanks in advance,
> Marcin
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-raid" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html

spare-same-slot question

am 09.12.2010 15:10:21 von anna.czarnowska

Hi,
Here is my observation:
Imagine that we have an array that went degraded leaving a cookie (appropriate config is there).
When we plug in a disk without metadata into the same slot that was used by this array
mdadm should add the new disk to that array. This is the way it goes at the moment.

What should happen when we connect a disk with spare metadata to this slot?

At the moment it is added to the original array or (imsm) anywhere.
Wouldn't it be reasonable to use the slot information when we have a spare?

In some cases Monitor will move it anyway but there is a chance that
another array may get it before the "owner" of this slot.
Or that Monitor may not move it as domain of the array doesn't include
domain of this slot any more.
We agreed that domain shouldn't be checked when there is a cookie but this only applies to Incremental.
(It will be implemented shortly.)

"spare-same-slot" sounds to me like a spare in the same slot should also go to the array described by cookie.
What is your opinion?

Anna
--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html