[PATCH 23/41] dm: implement REQ_FLUSH/FUA support for bio-based dm

am 03.09.2010 12:29:38 von Tejun Heo

This patch converts bio-based dm to support REQ_FLUSH/FUA instead of
now deprecated REQ_HARDBARRIER.

* -EOPNOTSUPP handling logic dropped.

* Preflush is handled as before but postflush is dropped and replaced
with passing down REQ_FUA to member request_queues. This replaces
one array wide cache flush w/ member specific FUA writes.

* __split_and_process_bio() now calls __clone_and_map_flush() directly
for flushes and guarantees all FLUSH bio's going to targets are zero
` length.

* It's now guaranteed that all FLUSH bio's which are passed onto dm
targets are zero length. bio_empty_barrier() tests are replaced
with REQ_FLUSH tests.

* Empty WRITE_BARRIERs are replaced with WRITE_FLUSHes.

* Dropped unlikely() around REQ_FLUSH tests. Flushes are not unlikely
enough to be marked with unlikely().

* Block layer now filters out REQ_FLUSH/FUA bio's if the request_queue
doesn't support cache flushing. Advertise REQ_FLUSH | REQ_FUA
capability.

* Request based dm isn't converted yet. dm_init_request_based_queue()
resets flush support to 0 for now. To avoid disturbing request
based dm code, dm->flush_error is added for bio based dm while
requested based dm continues to use dm->barrier_error.

Lightly tested linear, stripe, raid1, snap and crypt targets. Please
proceed with caution as I'm not familiar with the code base.

Signed-off-by: Tejun Heo
Cc: dm-devel@redhat.com
Cc: Christoph Hellwig
---
drivers/md/dm-crypt.c | 2 +-
drivers/md/dm-io.c | 20 +-----
drivers/md/dm-log.c | 2 +-
drivers/md/dm-raid1.c | 8 +-
drivers/md/dm-region-hash.c | 16 +++---
drivers/md/dm-snap-persistent.c | 2 +-
drivers/md/dm-snap.c | 6 +-
drivers/md/dm-stripe.c | 2 +-
drivers/md/dm.c | 119 +++++++++++++++++++--------------------
9 files changed, 80 insertions(+), 97 deletions(-)

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 368e8e9..d5b0e4c 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1278,7 +1278,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio,
struct dm_crypt_io *io;
struct crypt_config *cc;

- if (unlikely(bio_empty_barrier(bio))) {
+ if (bio->bi_rw & REQ_FLUSH) {
cc = ti->private;
bio->bi_bdev = cc->dev->bdev;
return DM_MAPIO_REMAPPED;
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 0590c75..136d4f7 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -31,7 +31,6 @@ struct dm_io_client {
*/
struct io {
unsigned long error_bits;
- unsigned long eopnotsupp_bits;
atomic_t count;
struct task_struct *sleeper;
struct dm_io_client *client;
@@ -130,11 +129,8 @@ static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io,
*----------------------------------------------------------- ----*/
static void dec_count(struct io *io, unsigned int region, int error)
{
- if (error) {
+ if (error)
set_bit(region, &io->error_bits);
- if (error == -EOPNOTSUPP)
- set_bit(region, &io->eopnotsupp_bits);
- }

if (atomic_dec_and_test(&io->count)) {
if (io->sleeper)
@@ -310,8 +306,8 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
sector_t remaining = where->count;

/*
- * where->count may be zero if rw holds a write barrier and we
- * need to send a zero-sized barrier.
+ * where->count may be zero if rw holds a flush and we need to
+ * send a zero-sized flush.
*/
do {
/*
@@ -364,7 +360,7 @@ static void dispatch_io(int rw, unsigned int num_regions,
*/
for (i = 0; i < num_regions; i++) {
*dp = old_pages;
- if (where[i].count || (rw & REQ_HARDBARRIER))
+ if (where[i].count || (rw & REQ_FLUSH))
do_region(rw, i, where + i, dp, io);
}

@@ -393,9 +389,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
return -EIO;
}

-retry:
io->error_bits = 0;
- io->eopnotsupp_bits = 0;
atomic_set(&io->count, 1); /* see dispatch_io() */
io->sleeper = current;
io->client = client;
@@ -412,11 +406,6 @@ retry:
}
set_current_state(TASK_RUNNING);

- if (io->eopnotsupp_bits && (rw & REQ_HARDBARRIER)) {
- rw &= ~REQ_HARDBARRIER;
- goto retry;
- }
-
if (error_bits)
*error_bits = io->error_bits;

@@ -437,7 +426,6 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions,

io = mempool_alloc(client->pool, GFP_NOIO);
io->error_bits = 0;
- io->eopnotsupp_bits = 0;
atomic_set(&io->count, 1); /* see dispatch_io() */
io->sleeper = NULL;
io->client = client;
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index 5a08be0..33420e6 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -300,7 +300,7 @@ static int flush_header(struct log_c *lc)
.count = 0,
};

- lc->io_req.bi_rw = WRITE_BARRIER;
+ lc->io_req.bi_rw = WRITE_FLUSH;

return dm_io(&lc->io_req, 1, &null_location, NULL);
}
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 7c081bc..19a59b0 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -259,7 +259,7 @@ static int mirror_flush(struct dm_target *ti)
struct dm_io_region io[ms->nr_mirrors];
struct mirror *m;
struct dm_io_request io_req = {
- .bi_rw = WRITE_BARRIER,
+ .bi_rw = WRITE_FLUSH,
.mem.type = DM_IO_KMEM,
.mem.ptr.bvec = NULL,
.client = ms->io_client,
@@ -629,7 +629,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio)
struct dm_io_region io[ms->nr_mirrors], *dest = io;
struct mirror *m;
struct dm_io_request io_req = {
- .bi_rw = WRITE | (bio->bi_rw & WRITE_BARRIER),
+ .bi_rw = WRITE | (bio->bi_rw & WRITE_FLUSH_FUA),
.mem.type = DM_IO_BVEC,
.mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx,
.notify.fn = write_callback,
@@ -670,7 +670,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
bio_list_init(&requeue);

while ((bio = bio_list_pop(writes))) {
- if (unlikely(bio_empty_barrier(bio))) {
+ if (bio->bi_rw & REQ_FLUSH) {
bio_list_add(&sync, bio);
continue;
}
@@ -1203,7 +1203,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
* We need to dec pending if this was a write.
*/
if (rw == WRITE) {
- if (likely(!bio_empty_barrier(bio)))
+ if (!(bio->bi_rw & REQ_FLUSH))
dm_rh_dec(ms->rh, map_context->ll);
return error;
}
diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c
index bd5c58b..dad011a 100644
--- a/drivers/md/dm-region-hash.c
+++ b/drivers/md/dm-region-hash.c
@@ -81,9 +81,9 @@ struct dm_region_hash {
struct list_head failed_recovered_regions;

/*
- * If there was a barrier failure no regions can be marked clean.
+ * If there was a flush failure no regions can be marked clean.
*/
- int barrier_failure;
+ int flush_failure;

void *context;
sector_t target_begin;
@@ -217,7 +217,7 @@ struct dm_region_hash *dm_region_hash_create(
INIT_LIST_HEAD(&rh->quiesced_regions);
INIT_LIST_HEAD(&rh->recovered_regions);
INIT_LIST_HEAD(&rh->failed_recovered_regions);
- rh->barrier_failure = 0;
+ rh->flush_failure = 0;

rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS,
sizeof(struct dm_region));
@@ -399,8 +399,8 @@ void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio)
region_t region = dm_rh_bio_to_region(rh, bio);
int recovering = 0;

- if (bio_empty_barrier(bio)) {
- rh->barrier_failure = 1;
+ if (bio->bi_rw & REQ_FLUSH) {
+ rh->flush_failure = 1;
return;
}

@@ -524,7 +524,7 @@ void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios)
struct bio *bio;

for (bio = bios->head; bio; bio = bio->bi_next) {
- if (bio_empty_barrier(bio))
+ if (bio->bi_rw & REQ_FLUSH)
continue;
rh_inc(rh, dm_rh_bio_to_region(rh, bio));
}
@@ -555,9 +555,9 @@ void dm_rh_dec(struct dm_region_hash *rh, region_t region)
*/

/* do nothing for DM_RH_NOSYNC */
- if (unlikely(rh->barrier_failure)) {
+ if (unlikely(rh->flush_failure)) {
/*
- * If a write barrier failed some time ago, we
+ * If a write flush failed some time ago, we
* don't know whether or not this write made it
* to the disk, so we must resync the device.
*/
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index cc2bdb8..0b61792 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -687,7 +687,7 @@ static void persistent_commit_exception(struct dm_exception_store *store,
/*
* Commit exceptions to disk.
*/
- if (ps->valid && area_io(ps, WRITE_BARRIER))
+ if (ps->valid && area_io(ps, WRITE_FLUSH_FUA))
ps->valid = 0;

/*
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 5974d30..eed2101 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1587,7 +1587,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
chunk_t chunk;
struct dm_snap_pending_exception *pe = NULL;

- if (unlikely(bio_empty_barrier(bio))) {
+ if (bio->bi_rw & REQ_FLUSH) {
bio->bi_bdev = s->cow->bdev;
return DM_MAPIO_REMAPPED;
}
@@ -1691,7 +1691,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
int r = DM_MAPIO_REMAPPED;
chunk_t chunk;

- if (unlikely(bio_empty_barrier(bio))) {
+ if (bio->bi_rw & REQ_FLUSH) {
if (!map_context->target_request_nr)
bio->bi_bdev = s->origin->bdev;
else
@@ -2135,7 +2135,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio,
struct dm_dev *dev = ti->private;
bio->bi_bdev = dev->bdev;

- if (unlikely(bio_empty_barrier(bio)))
+ if (bio->bi_rw & REQ_FLUSH)
return DM_MAPIO_REMAPPED;

/* Only tell snapshots if this is a write */
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index c297f6d..f0371b4 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -271,7 +271,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio,
uint32_t stripe;
unsigned target_request_nr;

- if (unlikely(bio_empty_barrier(bio))) {
+ if (bio->bi_rw & REQ_FLUSH) {
target_request_nr = map_context->target_request_nr;
BUG_ON(target_request_nr >= sc->stripes);
bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index b1d92be..32e6622 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -144,15 +144,16 @@ struct mapped_device {
spinlock_t deferred_lock;

/*
- * An error from the barrier request currently being processed.
+ * An error from the flush request currently being processed.
*/
- int barrier_error;
+ int flush_error;

/*
* Protect barrier_error from concurrent endio processing
* in request-based dm.
*/
spinlock_t barrier_error_lock;
+ int barrier_error;

/*
* Processing queue (flush/barriers)
@@ -200,8 +201,8 @@ struct mapped_device {
/* sysfs handle */
struct kobject kobj;

- /* zero-length barrier that will be cloned and submitted to targets */
- struct bio barrier_bio;
+ /* zero-length flush that will be cloned and submitted to targets */
+ struct bio flush_bio;
};

/*
@@ -512,7 +513,7 @@ static void end_io_acct(struct dm_io *io)

/*
* After this is decremented the bio must not be touched if it is
- * a barrier.
+ * a flush.
*/
dm_disk(md)->part0.in_flight[rw] = pending =
atomic_dec_return(&md->pending[rw]);
@@ -626,7 +627,7 @@ static void dec_pending(struct dm_io *io, int error)
*/
spin_lock_irqsave(&md->deferred_lock, flags);
if (__noflush_suspending(md)) {
- if (!(io->bio->bi_rw & REQ_HARDBARRIER))
+ if (!(io->bio->bi_rw & REQ_FLUSH))
bio_list_add_head(&md->deferred,
io->bio);
} else
@@ -638,20 +639,14 @@ static void dec_pending(struct dm_io *io, int error)
io_error = io->error;
bio = io->bio;

- if (bio->bi_rw & REQ_HARDBARRIER) {
+ if (bio->bi_rw & REQ_FLUSH) {
/*
- * There can be just one barrier request so we use
+ * There can be just one flush request so we use
* a per-device variable for error reporting.
* Note that you can't touch the bio after end_io_acct
- *
- * We ignore -EOPNOTSUPP for empty flush reported by
- * underlying devices. We assume that if the device
- * doesn't support empty barriers, it doesn't need
- * cache flushing commands.
*/
- if (!md->barrier_error &&
- !(bio_empty_barrier(bio) && io_error == -EOPNOTSUPP))
- md->barrier_error = io_error;
+ if (!md->flush_error)
+ md->flush_error = io_error;
end_io_acct(io);
free_io(md, io);
} else {
@@ -1119,7 +1114,7 @@ static void dm_bio_destructor(struct bio *bio)
}

/*
- * Creates a little bio that is just does part of a bvec.
+ * Creates a little bio that just does part of a bvec.
*/
static struct bio *split_bvec(struct bio *bio, sector_t sector,
unsigned short idx, unsigned int offset,
@@ -1134,7 +1129,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector,

clone->bi_sector = sector;
clone->bi_bdev = bio->bi_bdev;
- clone->bi_rw = bio->bi_rw & ~REQ_HARDBARRIER;
+ clone->bi_rw = bio->bi_rw;
clone->bi_vcnt = 1;
clone->bi_size = to_bytes(len);
clone->bi_io_vec->bv_offset = offset;
@@ -1161,7 +1156,6 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector,

clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
__bio_clone(clone, bio);
- clone->bi_rw &= ~REQ_HARDBARRIER;
clone->bi_destructor = dm_bio_destructor;
clone->bi_sector = sector;
clone->bi_idx = idx;
@@ -1225,7 +1219,7 @@ static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti,
__issue_target_request(ci, ti, request_nr, len);
}

-static int __clone_and_map_empty_barrier(struct clone_info *ci)
+static int __clone_and_map_flush(struct clone_info *ci)
{
unsigned target_nr = 0;
struct dm_target *ti;
@@ -1289,9 +1283,6 @@ static int __clone_and_map(struct clone_info *ci)
sector_t len = 0, max;
struct dm_target_io *tio;

- if (unlikely(bio_empty_barrier(bio)))
- return __clone_and_map_empty_barrier(ci);
-
if (unlikely(bio->bi_rw & REQ_DISCARD))
return __clone_and_map_discard(ci);

@@ -1383,11 +1374,11 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)

ci.map = dm_get_live_table(md);
if (unlikely(!ci.map)) {
- if (!(bio->bi_rw & REQ_HARDBARRIER))
+ if (!(bio->bi_rw & REQ_FLUSH))
bio_io_error(bio);
else
- if (!md->barrier_error)
- md->barrier_error = -EIO;
+ if (!md->flush_error)
+ md->flush_error = -EIO;
return;
}

@@ -1400,14 +1391,22 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
ci.io->md = md;
spin_lock_init(&ci.io->endio_lock);
ci.sector = bio->bi_sector;
- ci.sector_count = bio_sectors(bio);
- if (unlikely(bio_empty_barrier(bio)))
+ if (!(bio->bi_rw & REQ_FLUSH))
+ ci.sector_count = bio_sectors(bio);
+ else {
+ /* all FLUSH bio's reaching here should be empty */
+ WARN_ON_ONCE(bio_has_data(bio));
ci.sector_count = 1;
+ }
ci.idx = bio->bi_idx;

start_io_acct(ci.io);
- while (ci.sector_count && !error)
- error = __clone_and_map(&ci);
+ while (ci.sector_count && !error) {
+ if (!(bio->bi_rw & REQ_FLUSH))
+ error = __clone_and_map(&ci);
+ else
+ error = __clone_and_map_flush(&ci);
+ }

/* drop the extra reference count */
dec_pending(ci.io, error);
@@ -1492,11 +1491,11 @@ static int _dm_request(struct request_queue *q, struct bio *bio)
part_stat_unlock();

/*
- * If we're suspended or the thread is processing barriers
+ * If we're suspended or the thread is processing flushes
* we have to queue this io for later.
*/
if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) ||
- unlikely(bio->bi_rw & REQ_HARDBARRIER)) {
+ (bio->bi_rw & REQ_FLUSH)) {
up_read(&md->io_lock);

if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) &&
@@ -1940,6 +1939,7 @@ static void dm_init_md_queue(struct mapped_device *md)
blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
md->queue->unplug_fn = dm_unplug_all;
blk_queue_merge_bvec(md->queue, dm_merge_bvec);
+ blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA);
}

/*
@@ -2245,7 +2245,8 @@ static int dm_init_request_based_queue(struct mapped_device *md)
blk_queue_softirq_done(md->queue, dm_softirq_done);
blk_queue_prep_rq(md->queue, dm_prep_fn);
blk_queue_lld_busy(md->queue, dm_lld_busy);
- blk_queue_flush(md->queue, REQ_FLUSH);
+ /* no flush support for request based dm yet */
+ blk_queue_flush(md->queue, 0);

elv_register_queue(md->queue);

@@ -2406,41 +2407,35 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
return r;
}

-static void dm_flush(struct mapped_device *md)
+static void process_flush(struct mapped_device *md, struct bio *bio)
{
- dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
-
- bio_init(&md->barrier_bio);
- md->barrier_bio.bi_bdev = md->bdev;
- md->barrier_bio.bi_rw = WRITE_BARRIER;
- __split_and_process_bio(md, &md->barrier_bio);
+ md->flush_error = 0;

+ /* handle REQ_FLUSH */
dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
-}

-static void process_barrier(struct mapped_device *md, struct bio *bio)
-{
- md->barrier_error = 0;
+ bio_init(&md->flush_bio);
+ md->flush_bio.bi_bdev = md->bdev;
+ md->flush_bio.bi_rw = WRITE_FLUSH;
+ __split_and_process_bio(md, &md->flush_bio);

- dm_flush(md);
+ dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);

- if (!bio_empty_barrier(bio)) {
- __split_and_process_bio(md, bio);
- /*
- * If the request isn't supported, don't waste time with
- * the second flush.
- */
- if (md->barrier_error != -EOPNOTSUPP)
- dm_flush(md);
+ /* if it's an empty flush or the preflush failed, we're done */
+ if (!bio_has_data(bio) || md->flush_error) {
+ if (md->flush_error != DM_ENDIO_REQUEUE)
+ bio_endio(bio, md->flush_error);
+ else {
+ spin_lock_irq(&md->deferred_lock);
+ bio_list_add_head(&md->deferred, bio);
+ spin_unlock_irq(&md->deferred_lock);
+ }
+ return;
}

- if (md->barrier_error != DM_ENDIO_REQUEUE)
- bio_endio(bio, md->barrier_error);
- else {
- spin_lock_irq(&md->deferred_lock);
- bio_list_add_head(&md->deferred, bio);
- spin_unlock_irq(&md->deferred_lock);
- }
+ /* issue data + REQ_FUA */
+ bio->bi_rw &= ~REQ_FLUSH;
+ __split_and_process_bio(md, bio);
}

/*
@@ -2469,8 +2464,8 @@ static void dm_wq_work(struct work_struct *work)
if (dm_request_based(md))
generic_make_request(c);
else {
- if (c->bi_rw & REQ_HARDBARRIER)
- process_barrier(md, c);
+ if (c->bi_rw & REQ_FLUSH)
+ process_flush(md, c);
else
__split_and_process_bio(md, c);
}
--
1.7.1

Re: [PATCH 23/41] dm: implement REQ_FLUSH/FUA support for bio-baseddm

am 03.09.2010 14:36:18 von Mike Snitzer

On Fri, Sep 03 2010 at 6:29am -0400,
Tejun Heo wrote:

> This patch converts bio-based dm to support REQ_FLUSH/FUA instead of
> now deprecated REQ_HARDBARRIER.
>
> * -EOPNOTSUPP handling logic dropped.
>
> * Preflush is handled as before but postflush is dropped and replaced
> with passing down REQ_FUA to member request_queues. This replaces
> one array wide cache flush w/ member specific FUA writes.
>
> * __split_and_process_bio() now calls __clone_and_map_flush() directly
> for flushes and guarantees all FLUSH bio's going to targets are zero
> ` length.
>
> * It's now guaranteed that all FLUSH bio's which are passed onto dm
> targets are zero length. bio_empty_barrier() tests are replaced
> with REQ_FLUSH tests.
>
> * Empty WRITE_BARRIERs are replaced with WRITE_FLUSHes.
>
> * Dropped unlikely() around REQ_FLUSH tests. Flushes are not unlikely
> enough to be marked with unlikely().
>
> * Block layer now filters out REQ_FLUSH/FUA bio's if the request_queue
> doesn't support cache flushing. Advertise REQ_FLUSH | REQ_FUA
> capability.
>
> * Request based dm isn't converted yet. dm_init_request_based_queue()
> resets flush support to 0 for now. To avoid disturbing request
> based dm code, dm->flush_error is added for bio based dm while
> requested based dm continues to use dm->barrier_error.
>
> Lightly tested linear, stripe, raid1, snap and crypt targets. Please
> proceed with caution as I'm not familiar with the code base.

Reviewed-by: Mike Snitzer

Mikulas and/or Alasdair won't be able to review this until next week.
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html

Re: [dm-devel] [PATCH 23/41] dm: implement REQ_FLUSH/FUA supportfor bio-based dm

am 06.09.2010 13:14:32 von Milan Broz

On 09/03/2010 12:29 PM, Tejun Heo wrote:

> +++ b/drivers/md/dm-crypt.c
> @@ -1278,7 +1278,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio,
> struct dm_crypt_io *io;
> struct crypt_config *cc;
>
> - if (unlikely(bio_empty_barrier(bio))) {
> + if (bio->bi_rw & REQ_FLUSH) {
> cc = ti->private;
> bio->bi_bdev = cc->dev->bdev;
> return DM_MAPIO_REMAPPED;

....

> +++ b/drivers/md/dm.c
> @@ -1400,14 +1391,22 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
> ci.io->md = md;
> spin_lock_init(&ci.io->endio_lock);
> ci.sector = bio->bi_sector;
> - ci.sector_count = bio_sectors(bio);
> - if (unlikely(bio_empty_barrier(bio)))
> + if (!(bio->bi_rw & REQ_FLUSH))
> + ci.sector_count = bio_sectors(bio);
> + else {
> + /* all FLUSH bio's reaching here should be empty */
> + WARN_ON_ONCE(bio_has_data(bio));
> ci.sector_count = 1;
> + }

I would add BUG_ON(bio_has_data(bio)) either to dm-crypt target or directly to DM core
in this path.

Note that empty barrier request bypass encryption layer now in dm-crypt, so if some bio
with data payload reach it after the change, it causes data corruption
(moreover plain data reach the disk directly).

Milan
--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html

Re: [PATCH 23/41] dm: implement REQ_FLUSH/FUA support for bio-baseddm

am 07.09.2010 23:17:24 von Mike Snitzer

On Mon, Sep 06 2010 at 7:14am -0400,
Milan Broz wrote:

> On 09/03/2010 12:29 PM, Tejun Heo wrote:
>
> > +++ b/drivers/md/dm-crypt.c
> > @@ -1278,7 +1278,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio,
> > struct dm_crypt_io *io;
> > struct crypt_config *cc;
> >
> > - if (unlikely(bio_empty_barrier(bio))) {
> > + if (bio->bi_rw & REQ_FLUSH) {
> > cc = ti->private;
> > bio->bi_bdev = cc->dev->bdev;
> > return DM_MAPIO_REMAPPED;
>
> ...
>
> > +++ b/drivers/md/dm.c
> > @@ -1400,14 +1391,22 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
> > ci.io->md = md;
> > spin_lock_init(&ci.io->endio_lock);
> > ci.sector = bio->bi_sector;
> > - ci.sector_count = bio_sectors(bio);
> > - if (unlikely(bio_empty_barrier(bio)))
> > + if (!(bio->bi_rw & REQ_FLUSH))
> > + ci.sector_count = bio_sectors(bio);
> > + else {
> > + /* all FLUSH bio's reaching here should be empty */
> > + WARN_ON_ONCE(bio_has_data(bio));
> > ci.sector_count = 1;
> > + }
>
>
> I would add BUG_ON(bio_has_data(bio)) either to dm-crypt target or directly to DM core
> in this path.

I agree, that WARN_ON_ONCE should be changed to BUG_ON. This is a
guarantee that the block layer now provides so it seems correct to have
the DM core bug if that guarantee isn't actually provided.

> Note that empty barrier request bypass encryption layer now in dm-crypt, so if some bio
> with data payload reach it after the change, it causes data corruption
> (moreover plain data reach the disk directly).

Given the consequences, it wouldn't hurt to BUG_ON() in dm-crypt too.
It's redundant if the DM core will also BUG_ON() but it serves as a
dm-crypt safety-net/documentation.

Mike
--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html

Re: [PATCH 23/41] dm: implement REQ_FLUSH/FUA support for bio-baseddm

am 08.09.2010 00:15:43 von Mike Snitzer

On Tue, Sep 07 2010 at 5:17pm -0400,
Mike Snitzer wrote:

> On Mon, Sep 06 2010 at 7:14am -0400,
> Milan Broz wrote:
>
> > On 09/03/2010 12:29 PM, Tejun Heo wrote:
> >
> > > +++ b/drivers/md/dm-crypt.c
> > > @@ -1278,7 +1278,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio,
> > > struct dm_crypt_io *io;
> > > struct crypt_config *cc;
> > >
> > > - if (unlikely(bio_empty_barrier(bio))) {
> > > + if (bio->bi_rw & REQ_FLUSH) {
> > > cc = ti->private;
> > > bio->bi_bdev = cc->dev->bdev;
> > > return DM_MAPIO_REMAPPED;
> >
> > ...
> >
> > > +++ b/drivers/md/dm.c
> > > @@ -1400,14 +1391,22 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
> > > ci.io->md = md;
> > > spin_lock_init(&ci.io->endio_lock);
> > > ci.sector = bio->bi_sector;
> > > - ci.sector_count = bio_sectors(bio);
> > > - if (unlikely(bio_empty_barrier(bio)))
> > > + if (!(bio->bi_rw & REQ_FLUSH))
> > > + ci.sector_count = bio_sectors(bio);
> > > + else {
> > > + /* all FLUSH bio's reaching here should be empty */
> > > + WARN_ON_ONCE(bio_has_data(bio));
> > > ci.sector_count = 1;
> > > + }
> >
> >
> > I would add BUG_ON(bio_has_data(bio)) either to dm-crypt target or directly to DM core
> > in this path.
>
> I agree, that WARN_ON_ONCE should be changed to BUG_ON. This is a
> guarantee that the block layer now provides so it seems correct to have
> the DM core bug if that guarantee isn't actually provided.

I was mistaken, DM enforces that guarantee... ;)

(but block layer will also enforce empty flush for request-based)

But it wasn't clear until Christoph and I looked closer. The point
stands though; we should BUG_ON rather than WARN_ON_ONCE.

I'll send a follow-on patch to help clean this code up a bit more (based
on Christoph's suggestions). Mainly just making the flush paths a bit
more distinct and adding some comments.

Mike
--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html

[PATCH 42/41] dm: convey that all flushes are processed as empty

am 08.09.2010 01:49:18 von Mike Snitzer

Rename __clone_and_map_flush to __clone_and_map_empty_flush for added
clarity.

Reintroduce a BUG_ON() and add a few more helpful comments to the code
so that it is clear that all flushes are empty.

Cleanup __split_and_process_bio() so that an empty flush isn't processed
by a 'sector_count' focused while loop.

Signed-off-by: Mike Snitzer
---
drivers/md/dm.c | 21 +++++++++------------
1 files changed, 9 insertions(+), 12 deletions(-)

Tejun, please feel free to fold this patch into (or insert after)
0025-dm-relax-ordering-of-bio-based-flush-implementation.pat ch

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index cd2f7e7..9a852ee 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -622,6 +622,7 @@ static void dec_pending(struct dm_io *io, int error)
return;

if (!(bio->bi_rw & REQ_FLUSH) || !bio->bi_size) {
+ /* done with normal IO or empty flush */
trace_block_bio_complete(md->queue, bio);
bio_endio(bio, io_error);
} else {
@@ -1132,16 +1133,15 @@ static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti,
__issue_target_request(ci, ti, request_nr, len);
}

-static int __clone_and_map_flush(struct clone_info *ci)
+static int __clone_and_map_empty_flush(struct clone_info *ci)
{
unsigned target_nr = 0;
struct dm_target *ti;

+ BUG_ON(bio_has_data(ci->bio));
while ((ti = dm_table_get_target(ci->map, target_nr++)))
__issue_target_requests(ci, ti, ti->num_flush_requests, 0);

- ci->sector_count = 0;
-
return 0;
}

@@ -1302,20 +1302,17 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
ci.sector = bio->bi_sector;
ci.idx = bio->bi_idx;

+ start_io_acct(ci.io);
if (!is_flush) {
ci.bio = bio;
ci.sector_count = bio_sectors(bio);
+ while (ci.sector_count && !error)
+ error = __clone_and_map(&ci);
} else {
ci.bio = &ci.md->flush_bio;
- ci.sector_count = 1;
- }
-
- start_io_acct(ci.io);
- while (ci.sector_count && !error) {
- if (!is_flush)
- error = __clone_and_map(&ci);
- else
- error = __clone_and_map_flush(&ci);
+ ci.sector_count = 0;
+ error = __clone_and_map_empty_flush(&ci);
+ /* dec_pending submits any data associated with flush */
}

/* drop the extra reference count */
--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html

Re: [PATCH 42/41] dm: convey that all flushes are processed as empty

am 08.09.2010 02:00:01 von Christoph Hellwig

On Tue, Sep 07, 2010 at 07:49:18PM -0400, Mike Snitzer wrote:
> if (!(bio->bi_rw & REQ_FLUSH) || !bio->bi_size) {
> + /* done with normal IO or empty flush */
> trace_block_bio_complete(md->queue, bio);
> bio_endio(bio, io_error);
> } else {

To clarify this further I'd reorder the checks:

if ((bio->bi_rw & REQ_FLUSH) && bio->bi_size) {
/*
* Preflush done, ...
*/
} else {
}

> @@ -1302,20 +1302,17 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
> ci.sector = bio->bi_sector;
> ci.idx = bio->bi_idx;
>
> + start_io_acct(ci.io);
> if (!is_flush) {

no need for the is_flush anymore now that it's only used once. Again,
I think avoiding negatives without a reason in if statement usually
makes the code a bit more clear.

--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html

[PATCH 42/41 v2] dm: convey that all flushes are processed as empty

am 08.09.2010 04:04:30 von Mike Snitzer

Rename __clone_and_map_flush to __clone_and_map_empty_flush for added
clarity.

Simplify logic associated with REQ_FLUSH conditionals.

Introduce a BUG_ON() and add a few more helpful comments to the code
so that it is clear that all flushes are empty.

Cleanup __split_and_process_bio() so that an empty flush isn't processed
by a 'sector_count' focused while loop.

Signed-off-by: Mike Snitzer
---
drivers/md/dm.c | 34 +++++++++++++++-------------------
1 files changed, 15 insertions(+), 19 deletions(-)

Tejun, please feel free to fold this patch into (or insert after)
0025-dm-relax-ordering-of-bio-based-flush-implementation.pat ch

v2: Simplify logic associated with REQ_FLUSH conditionals

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index cd2f7e7..f934e98 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -621,16 +621,17 @@ static void dec_pending(struct dm_io *io, int error)
if (io_error == DM_ENDIO_REQUEUE)
return;

- if (!(bio->bi_rw & REQ_FLUSH) || !bio->bi_size) {
- trace_block_bio_complete(md->queue, bio);
- bio_endio(bio, io_error);
- } else {
+ if ((bio->bi_rw & REQ_FLUSH) && bio->bi_size) {
/*
* Preflush done for flush with data, reissue
* without REQ_FLUSH.
*/
bio->bi_rw &= ~REQ_FLUSH;
queue_io(md, bio);
+ } else {
+ /* done with normal IO or empty flush */
+ trace_block_bio_complete(md->queue, bio);
+ bio_endio(bio, io_error);
}
}
}
@@ -1132,16 +1133,15 @@ static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti,
__issue_target_request(ci, ti, request_nr, len);
}

-static int __clone_and_map_flush(struct clone_info *ci)
+static int __clone_and_map_empty_flush(struct clone_info *ci)
{
unsigned target_nr = 0;
struct dm_target *ti;

+ BUG_ON(bio_has_data(ci->bio));
while ((ti = dm_table_get_target(ci->map, target_nr++)))
__issue_target_requests(ci, ti, ti->num_flush_requests, 0);

- ci->sector_count = 0;
-
return 0;
}

@@ -1282,7 +1282,6 @@ static int __clone_and_map(struct clone_info *ci)
*/
static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
{
- bool is_flush = bio->bi_rw & REQ_FLUSH;
struct clone_info ci;
int error = 0;

@@ -1302,20 +1301,17 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
ci.sector = bio->bi_sector;
ci.idx = bio->bi_idx;

- if (!is_flush) {
+ start_io_acct(ci.io);
+ if (bio->bi_rw & REQ_FLUSH) {
+ ci.bio = &ci.md->flush_bio;
+ ci.sector_count = 0;
+ error = __clone_and_map_empty_flush(&ci);
+ /* dec_pending submits any data associated with flush */
+ } else {
ci.bio = bio;
ci.sector_count = bio_sectors(bio);
- } else {
- ci.bio = &ci.md->flush_bio;
- ci.sector_count = 1;
- }
-
- start_io_acct(ci.io);
- while (ci.sector_count && !error) {
- if (!is_flush)
+ while (ci.sector_count && !error)
error = __clone_and_map(&ci);
- else
- error = __clone_and_map_flush(&ci);
}

/* drop the extra reference count */

Re: [PATCH 42/41 v2] dm: convey that all flushes are processed asempty

am 08.09.2010 18:09:00 von Tejun Heo

On 09/08/2010 04:04 AM, Mike Snitzer wrote:
> Rename __clone_and_map_flush to __clone_and_map_empty_flush for added
> clarity.
>
> Simplify logic associated with REQ_FLUSH conditionals.
>
> Introduce a BUG_ON() and add a few more helpful comments to the code
> so that it is clear that all flushes are empty.
>
> Cleanup __split_and_process_bio() so that an empty flush isn't processed
> by a 'sector_count' focused while loop.
>
> Signed-off-by: Mike Snitzer
> ---
> drivers/md/dm.c | 34 +++++++++++++++-------------------
> 1 files changed, 15 insertions(+), 19 deletions(-)
>
> Tejun, please feel free to fold this patch into (or insert after)
> 0025-dm-relax-ordering-of-bio-based-flush-implementation.pat ch
>
> v2: Simplify logic associated with REQ_FLUSH conditionals
>
> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> index cd2f7e7..f934e98 100644
> --- a/drivers/md/dm.c
> +++ b/drivers/md/dm.c
> @@ -621,16 +621,17 @@ static void dec_pending(struct dm_io *io, int error)
> if (io_error == DM_ENDIO_REQUEUE)
> return;
>
> - if (!(bio->bi_rw & REQ_FLUSH) || !bio->bi_size) {
> - trace_block_bio_complete(md->queue, bio);
> - bio_endio(bio, io_error);
> - } else {
> + if ((bio->bi_rw & REQ_FLUSH) && bio->bi_size) {
> /*
> * Preflush done for flush with data, reissue
> * without REQ_FLUSH.
> */
> bio->bi_rw &= ~REQ_FLUSH;
> queue_io(md, bio);
> + } else {
> + /* done with normal IO or empty flush */
> + trace_block_bio_complete(md->queue, bio);
> + bio_endio(bio, io_error);
> }
> }
> }
> @@ -1132,16 +1133,15 @@ static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti,
> __issue_target_request(ci, ti, request_nr, len);
> }
>
> -static int __clone_and_map_flush(struct clone_info *ci)
> +static int __clone_and_map_empty_flush(struct clone_info *ci)
> {
> unsigned target_nr = 0;
> struct dm_target *ti;
>
> + BUG_ON(bio_has_data(ci->bio));
> while ((ti = dm_table_get_target(ci->map, target_nr++)))
> __issue_target_requests(ci, ti, ti->num_flush_requests, 0);
>
> - ci->sector_count = 0;
> -
> return 0;
> }
>
> @@ -1282,7 +1282,6 @@ static int __clone_and_map(struct clone_info *ci)
> */
> static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
> {
> - bool is_flush = bio->bi_rw & REQ_FLUSH;
> struct clone_info ci;
> int error = 0;
>
> @@ -1302,20 +1301,17 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
> ci.sector = bio->bi_sector;
> ci.idx = bio->bi_idx;
>
> - if (!is_flush) {
> + start_io_acct(ci.io);
> + if (bio->bi_rw & REQ_FLUSH) {
> + ci.bio = &ci.md->flush_bio;
> + ci.sector_count = 0;
> + error = __clone_and_map_empty_flush(&ci);
> + /* dec_pending submits any data associated with flush */
> + } else {
> ci.bio = bio;
> ci.sector_count = bio_sectors(bio);
> - } else {
> - ci.bio = &ci.md->flush_bio;
> - ci.sector_count = 1;
> - }
> -
> - start_io_acct(ci.io);
> - while (ci.sector_count && !error) {
> - if (!is_flush)
> + while (ci.sector_count && !error)
> error = __clone_and_map(&ci);
> - else
> - error = __clone_and_map_flush(&ci);
> }
>
> /* drop the extra reference count */

--
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html

Re: [PATCH 42/41 v2] dm: convey that all flushes are processed asempty

am 08.09.2010 18:09:43 von Tejun Heo

Re: [PATCH 23/41] dm: implement REQ_FLUSH/FUA support for bio-baseddm

am 10.09.2010 20:25:08 von Mikulas Patocka

I quite disagree with the patch. It changes too many things.

What I said and what I want to see:

Take dm code as is. Treat FLUSH requests as empty barriers.

So I want to see a patch that only changes:

bio_empty_barrier(bio) -> bio->bi_rw & REQ_FLUSH
WRITE_BARRIER -> WRITE_FLUSH
etc.

so that the code compiles and works.

DON'T CHANGE ANYTHING ELSE.

Requirements of flushes are subset of requirements of barriers, so if you
send flush and it is treated as a barrier inside DM, there's no problem.
DM code that I wrote only sends out zero-data barriers and already treats
them as flushes (it doesn't rely on ordering), so there's no problem with
sent requests too.

Once fluges get into kernel, I'll clean it up to allow parallel flushes
and requests, etc. But not before. I don't want to work on an interface
that is under development and may be changed.

Mikulas

On Fri, 3 Sep 2010, Tejun Heo wrote:

> This patch converts bio-based dm to support REQ_FLUSH/FUA instead of
> now deprecated REQ_HARDBARRIER.
>
> * -EOPNOTSUPP handling logic dropped.
>
> * Preflush is handled as before but postflush is dropped and replaced
> with passing down REQ_FUA to member request_queues. This replaces
> one array wide cache flush w/ member specific FUA writes.
>
> * __split_and_process_bio() now calls __clone_and_map_flush() directly
> for flushes and guarantees all FLUSH bio's going to targets are zero
> ` length.
>
> * It's now guaranteed that all FLUSH bio's which are passed onto dm
> targets are zero length. bio_empty_barrier() tests are replaced
> with REQ_FLUSH tests.
>
> * Empty WRITE_BARRIERs are replaced with WRITE_FLUSHes.
>
> * Dropped unlikely() around REQ_FLUSH tests. Flushes are not unlikely
> enough to be marked with unlikely().
>
> * Block layer now filters out REQ_FLUSH/FUA bio's if the request_queue
> doesn't support cache flushing. Advertise REQ_FLUSH | REQ_FUA
> capability.
>
> * Request based dm isn't converted yet. dm_init_request_based_queue()
> resets flush support to 0 for now. To avoid disturbing request
> based dm code, dm->flush_error is added for bio based dm while
> requested based dm continues to use dm->barrier_error.
>
> Lightly tested linear, stripe, raid1, snap and crypt targets. Please
> proceed with caution as I'm not familiar with the code base.
>
> Signed-off-by: Tejun Heo
> Cc: dm-devel@redhat.com
> Cc: Christoph Hellwig
> ---
> drivers/md/dm-crypt.c | 2 +-
> drivers/md/dm-io.c | 20 +-----
> drivers/md/dm-log.c | 2 +-
> drivers/md/dm-raid1.c | 8 +-
> drivers/md/dm-region-hash.c | 16 +++---
> drivers/md/dm-snap-persistent.c | 2 +-
> drivers/md/dm-snap.c | 6 +-
> drivers/md/dm-stripe.c | 2 +-
> drivers/md/dm.c | 119 +++++++++++++++++++--------------------
> 9 files changed, 80 insertions(+), 97 deletions(-)
>
> diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
> index 368e8e9..d5b0e4c 100644
> --- a/drivers/md/dm-crypt.c
> +++ b/drivers/md/dm-crypt.c
> @@ -1278,7 +1278,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio,
> struct dm_crypt_io *io;
> struct crypt_config *cc;
>
> - if (unlikely(bio_empty_barrier(bio))) {
> + if (bio->bi_rw & REQ_FLUSH) {
> cc = ti->private;
> bio->bi_bdev = cc->dev->bdev;
> return DM_MAPIO_REMAPPED;
> diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
> index 0590c75..136d4f7 100644
> --- a/drivers/md/dm-io.c
> +++ b/drivers/md/dm-io.c
> @@ -31,7 +31,6 @@ struct dm_io_client {
> */
> struct io {
> unsigned long error_bits;
> - unsigned long eopnotsupp_bits;
> atomic_t count;
> struct task_struct *sleeper;
> struct dm_io_client *client;
> @@ -130,11 +129,8 @@ static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io,
> *----------------------------------------------------------- ----*/
> static void dec_count(struct io *io, unsigned int region, int error)
> {
> - if (error) {
> + if (error)
> set_bit(region, &io->error_bits);
> - if (error == -EOPNOTSUPP)
> - set_bit(region, &io->eopnotsupp_bits);
> - }
>
> if (atomic_dec_and_test(&io->count)) {
> if (io->sleeper)
> @@ -310,8 +306,8 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
> sector_t remaining = where->count;
>
> /*
> - * where->count may be zero if rw holds a write barrier and we
> - * need to send a zero-sized barrier.
> + * where->count may be zero if rw holds a flush and we need to
> + * send a zero-sized flush.
> */
> do {
> /*
> @@ -364,7 +360,7 @@ static void dispatch_io(int rw, unsigned int num_regions,
> */
> for (i = 0; i < num_regions; i++) {
> *dp = old_pages;
> - if (where[i].count || (rw & REQ_HARDBARRIER))
> + if (where[i].count || (rw & REQ_FLUSH))
> do_region(rw, i, where + i, dp, io);
> }
>
> @@ -393,9 +389,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
> return -EIO;
> }
>
> -retry:
> io->error_bits = 0;
> - io->eopnotsupp_bits = 0;
> atomic_set(&io->count, 1); /* see dispatch_io() */
> io->sleeper = current;
> io->client = client;
> @@ -412,11 +406,6 @@ retry:
> }
> set_current_state(TASK_RUNNING);
>
> - if (io->eopnotsupp_bits && (rw & REQ_HARDBARRIER)) {
> - rw &= ~REQ_HARDBARRIER;
> - goto retry;
> - }
> -
> if (error_bits)
> *error_bits = io->error_bits;
>
> @@ -437,7 +426,6 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions,
>
> io = mempool_alloc(client->pool, GFP_NOIO);
> io->error_bits = 0;
> - io->eopnotsupp_bits = 0;
> atomic_set(&io->count, 1); /* see dispatch_io() */
> io->sleeper = NULL;
> io->client = client;
> diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
> index 5a08be0..33420e6 100644
> --- a/drivers/md/dm-log.c
> +++ b/drivers/md/dm-log.c
> @@ -300,7 +300,7 @@ static int flush_header(struct log_c *lc)
> .count = 0,
> };
>
> - lc->io_req.bi_rw = WRITE_BARRIER;
> + lc->io_req.bi_rw = WRITE_FLUSH;
>
> return dm_io(&lc->io_req, 1, &null_location, NULL);
> }
> diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
> index 7c081bc..19a59b0 100644
> --- a/drivers/md/dm-raid1.c
> +++ b/drivers/md/dm-raid1.c
> @@ -259,7 +259,7 @@ static int mirror_flush(struct dm_target *ti)
> struct dm_io_region io[ms->nr_mirrors];
> struct mirror *m;
> struct dm_io_request io_req = {
> - .bi_rw = WRITE_BARRIER,
> + .bi_rw = WRITE_FLUSH,
> .mem.type = DM_IO_KMEM,
> .mem.ptr.bvec = NULL,
> .client = ms->io_client,
> @@ -629,7 +629,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio)
> struct dm_io_region io[ms->nr_mirrors], *dest = io;
> struct mirror *m;
> struct dm_io_request io_req = {
> - .bi_rw = WRITE | (bio->bi_rw & WRITE_BARRIER),
> + .bi_rw = WRITE | (bio->bi_rw & WRITE_FLUSH_FUA),
> .mem.type = DM_IO_BVEC,
> .mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx,
> .notify.fn = write_callback,
> @@ -670,7 +670,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
> bio_list_init(&requeue);
>
> while ((bio = bio_list_pop(writes))) {
> - if (unlikely(bio_empty_barrier(bio))) {
> + if (bio->bi_rw & REQ_FLUSH) {
> bio_list_add(&sync, bio);
> continue;
> }
> @@ -1203,7 +1203,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
> * We need to dec pending if this was a write.
> */
> if (rw == WRITE) {
> - if (likely(!bio_empty_barrier(bio)))
> + if (!(bio->bi_rw & REQ_FLUSH))
> dm_rh_dec(ms->rh, map_context->ll);
> return error;
> }
> diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c
> index bd5c58b..dad011a 100644
> --- a/drivers/md/dm-region-hash.c
> +++ b/drivers/md/dm-region-hash.c
> @@ -81,9 +81,9 @@ struct dm_region_hash {
> struct list_head failed_recovered_regions;
>
> /*
> - * If there was a barrier failure no regions can be marked clean.
> + * If there was a flush failure no regions can be marked clean.
> */
> - int barrier_failure;
> + int flush_failure;
>
> void *context;
> sector_t target_begin;
> @@ -217,7 +217,7 @@ struct dm_region_hash *dm_region_hash_create(
> INIT_LIST_HEAD(&rh->quiesced_regions);
> INIT_LIST_HEAD(&rh->recovered_regions);
> INIT_LIST_HEAD(&rh->failed_recovered_regions);
> - rh->barrier_failure = 0;
> + rh->flush_failure = 0;
>
> rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS,
> sizeof(struct dm_region));
> @@ -399,8 +399,8 @@ void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio)
> region_t region = dm_rh_bio_to_region(rh, bio);
> int recovering = 0;
>
> - if (bio_empty_barrier(bio)) {
> - rh->barrier_failure = 1;
> + if (bio->bi_rw & REQ_FLUSH) {
> + rh->flush_failure = 1;
> return;
> }
>
> @@ -524,7 +524,7 @@ void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios)
> struct bio *bio;
>
> for (bio = bios->head; bio; bio = bio->bi_next) {
> - if (bio_empty_barrier(bio))
> + if (bio->bi_rw & REQ_FLUSH)
> continue;
> rh_inc(rh, dm_rh_bio_to_region(rh, bio));
> }
> @@ -555,9 +555,9 @@ void dm_rh_dec(struct dm_region_hash *rh, region_t region)
> */
>
> /* do nothing for DM_RH_NOSYNC */
> - if (unlikely(rh->barrier_failure)) {
> + if (unlikely(rh->flush_failure)) {
> /*
> - * If a write barrier failed some time ago, we
> + * If a write flush failed some time ago, we
> * don't know whether or not this write made it
> * to the disk, so we must resync the device.
> */
> diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
> index cc2bdb8..0b61792 100644
> --- a/drivers/md/dm-snap-persistent.c
> +++ b/drivers/md/dm-snap-persistent.c
> @@ -687,7 +687,7 @@ static void persistent_commit_exception(struct dm_exception_store *store,
> /*
> * Commit exceptions to disk.
> */
> - if (ps->valid && area_io(ps, WRITE_BARRIER))
> + if (ps->valid && area_io(ps, WRITE_FLUSH_FUA))
> ps->valid = 0;
>
> /*
> diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
> index 5974d30..eed2101 100644
> --- a/drivers/md/dm-snap.c
> +++ b/drivers/md/dm-snap.c
> @@ -1587,7 +1587,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
> chunk_t chunk;
> struct dm_snap_pending_exception *pe = NULL;
>
> - if (unlikely(bio_empty_barrier(bio))) {
> + if (bio->bi_rw & REQ_FLUSH) {
> bio->bi_bdev = s->cow->bdev;
> return DM_MAPIO_REMAPPED;
> }
> @@ -1691,7 +1691,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
> int r = DM_MAPIO_REMAPPED;
> chunk_t chunk;
>
> - if (unlikely(bio_empty_barrier(bio))) {
> + if (bio->bi_rw & REQ_FLUSH) {
> if (!map_context->target_request_nr)
> bio->bi_bdev = s->origin->bdev;
> else
> @@ -2135,7 +2135,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio,
> struct dm_dev *dev = ti->private;
> bio->bi_bdev = dev->bdev;
>
> - if (unlikely(bio_empty_barrier(bio)))
> + if (bio->bi_rw & REQ_FLUSH)
> return DM_MAPIO_REMAPPED;
>
> /* Only tell snapshots if this is a write */
> diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
> index c297f6d..f0371b4 100644
> --- a/drivers/md/dm-stripe.c
> +++ b/drivers/md/dm-stripe.c
> @@ -271,7 +271,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio,
> uint32_t stripe;
> unsigned target_request_nr;
>
> - if (unlikely(bio_empty_barrier(bio))) {
> + if (bio->bi_rw & REQ_FLUSH) {
> target_request_nr = map_context->target_request_nr;
> BUG_ON(target_request_nr >= sc->stripes);
> bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev;
> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> index b1d92be..32e6622 100644
> --- a/drivers/md/dm.c
> +++ b/drivers/md/dm.c
> @@ -144,15 +144,16 @@ struct mapped_device {
> spinlock_t deferred_lock;
>
> /*
> - * An error from the barrier request currently being processed.
> + * An error from the flush request currently being processed.
> */
> - int barrier_error;
> + int flush_error;
>
> /*
> * Protect barrier_error from concurrent endio processing
> * in request-based dm.
> */
> spinlock_t barrier_error_lock;
> + int barrier_error;
>
> /*
> * Processing queue (flush/barriers)
> @@ -200,8 +201,8 @@ struct mapped_device {
> /* sysfs handle */
> struct kobject kobj;
>
> - /* zero-length barrier that will be cloned and submitted to targets */
> - struct bio barrier_bio;
> + /* zero-length flush that will be cloned and submitted to targets */
> + struct bio flush_bio;
> };
>
> /*
> @@ -512,7 +513,7 @@ static void end_io_acct(struct dm_io *io)
>
> /*
> * After this is decremented the bio must not be touched if it is
> - * a barrier.
> + * a flush.
> */
> dm_disk(md)->part0.in_flight[rw] = pending =
> atomic_dec_return(&md->pending[rw]);
> @@ -626,7 +627,7 @@ static void dec_pending(struct dm_io *io, int error)
> */
> spin_lock_irqsave(&md->deferred_lock, flags);
> if (__noflush_suspending(md)) {
> - if (!(io->bio->bi_rw & REQ_HARDBARRIER))
> + if (!(io->bio->bi_rw & REQ_FLUSH))
> bio_list_add_head(&md->deferred,
> io->bio);
> } else
> @@ -638,20 +639,14 @@ static void dec_pending(struct dm_io *io, int error)
> io_error = io->error;
> bio = io->bio;
>
> - if (bio->bi_rw & REQ_HARDBARRIER) {
> + if (bio->bi_rw & REQ_FLUSH) {
> /*
> - * There can be just one barrier request so we use
> + * There can be just one flush request so we use
> * a per-device variable for error reporting.
> * Note that you can't touch the bio after end_io_acct
> - *
> - * We ignore -EOPNOTSUPP for empty flush reported by
> - * underlying devices. We assume that if the device
> - * doesn't support empty barriers, it doesn't need
> - * cache flushing commands.
> */
> - if (!md->barrier_error &&
> - !(bio_empty_barrier(bio) && io_error == -EOPNOTSUPP))
> - md->barrier_error = io_error;
> + if (!md->flush_error)
> + md->flush_error = io_error;
> end_io_acct(io);
> free_io(md, io);
> } else {
> @@ -1119,7 +1114,7 @@ static void dm_bio_destructor(struct bio *bio)
> }
>
> /*
> - * Creates a little bio that is just does part of a bvec.
> + * Creates a little bio that just does part of a bvec.
> */
> static struct bio *split_bvec(struct bio *bio, sector_t sector,
> unsigned short idx, unsigned int offset,
> @@ -1134,7 +1129,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector,
>
> clone->bi_sector = sector;
> clone->bi_bdev = bio->bi_bdev;
> - clone->bi_rw = bio->bi_rw & ~REQ_HARDBARRIER;
> + clone->bi_rw = bio->bi_rw;
> clone->bi_vcnt = 1;
> clone->bi_size = to_bytes(len);
> clone->bi_io_vec->bv_offset = offset;
> @@ -1161,7 +1156,6 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector,
>
> clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
> __bio_clone(clone, bio);
> - clone->bi_rw &= ~REQ_HARDBARRIER;
> clone->bi_destructor = dm_bio_destructor;
> clone->bi_sector = sector;
> clone->bi_idx = idx;
> @@ -1225,7 +1219,7 @@ static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti,
> __issue_target_request(ci, ti, request_nr, len);
> }
>
> -static int __clone_and_map_empty_barrier(struct clone_info *ci)
> +static int __clone_and_map_flush(struct clone_info *ci)
> {
> unsigned target_nr = 0;
> struct dm_target *ti;
> @@ -1289,9 +1283,6 @@ static int __clone_and_map(struct clone_info *ci)
> sector_t len = 0, max;
> struct dm_target_io *tio;
>
> - if (unlikely(bio_empty_barrier(bio)))
> - return __clone_and_map_empty_barrier(ci);
> -
> if (unlikely(bio->bi_rw & REQ_DISCARD))
> return __clone_and_map_discard(ci);
>
> @@ -1383,11 +1374,11 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
>
> ci.map = dm_get_live_table(md);
> if (unlikely(!ci.map)) {
> - if (!(bio->bi_rw & REQ_HARDBARRIER))
> + if (!(bio->bi_rw & REQ_FLUSH))
> bio_io_error(bio);
> else
> - if (!md->barrier_error)
> - md->barrier_error = -EIO;
> + if (!md->flush_error)
> + md->flush_error = -EIO;
> return;
> }
>
> @@ -1400,14 +1391,22 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
> ci.io->md = md;
> spin_lock_init(&ci.io->endio_lock);
> ci.sector = bio->bi_sector;
> - ci.sector_count = bio_sectors(bio);
> - if (unlikely(bio_empty_barrier(bio)))
> + if (!(bio->bi_rw & REQ_FLUSH))
> + ci.sector_count = bio_sectors(bio);
> + else {
> + /* all FLUSH bio's reaching here should be empty */
> + WARN_ON_ONCE(bio_has_data(bio));
> ci.sector_count = 1;
> + }
> ci.idx = bio->bi_idx;
>
> start_io_acct(ci.io);
> - while (ci.sector_count && !error)
> - error = __clone_and_map(&ci);
> + while (ci.sector_count && !error) {
> + if (!(bio->bi_rw & REQ_FLUSH))
> + error = __clone_and_map(&ci);
> + else
> + error = __clone_and_map_flush(&ci);
> + }
>
> /* drop the extra reference count */
> dec_pending(ci.io, error);
> @@ -1492,11 +1491,11 @@ static int _dm_request(struct request_queue *q, struct bio *bio)
> part_stat_unlock();
>
> /*
> - * If we're suspended or the thread is processing barriers
> + * If we're suspended or the thread is processing flushes
> * we have to queue this io for later.
> */
> if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) ||
> - unlikely(bio->bi_rw & REQ_HARDBARRIER)) {
> + (bio->bi_rw & REQ_FLUSH)) {
> up_read(&md->io_lock);
>
> if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) &&
> @@ -1940,6 +1939,7 @@ static void dm_init_md_queue(struct mapped_device *md)
> blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
> md->queue->unplug_fn = dm_unplug_all;
> blk_queue_merge_bvec(md->queue, dm_merge_bvec);
> + blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA);
> }
>
> /*
> @@ -2245,7 +2245,8 @@ static int dm_init_request_based_queue(struct mapped_device *md)
> blk_queue_softirq_done(md->queue, dm_softirq_done);
> blk_queue_prep_rq(md->queue, dm_prep_fn);
> blk_queue_lld_busy(md->queue, dm_lld_busy);
> - blk_queue_flush(md->queue, REQ_FLUSH);
> + /* no flush support for request based dm yet */
> + blk_queue_flush(md->queue, 0);
>
> elv_register_queue(md->queue);
>
> @@ -2406,41 +2407,35 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
> return r;
> }
>
> -static void dm_flush(struct mapped_device *md)
> +static void process_flush(struct mapped_device *md, struct bio *bio)
> {
> - dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
> -
> - bio_init(&md->barrier_bio);
> - md->barrier_bio.bi_bdev = md->bdev;
> - md->barrier_bio.bi_rw = WRITE_BARRIER;
> - __split_and_process_bio(md, &md->barrier_bio);
> + md->flush_error = 0;
>
> + /* handle REQ_FLUSH */
> dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
> -}
>
> -static void process_barrier(struct mapped_device *md, struct bio *bio)
> -{
> - md->barrier_error = 0;
> + bio_init(&md->flush_bio);
> + md->flush_bio.bi_bdev = md->bdev;
> + md->flush_bio.bi_rw = WRITE_FLUSH;
> + __split_and_process_bio(md, &md->flush_bio);
>
> - dm_flush(md);
> + dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
>
> - if (!bio_empty_barrier(bio)) {
> - __split_and_process_bio(md, bio);
> - /*
> - * If the request isn't supported, don't waste time with
> - * the second flush.
> - */
> - if (md->barrier_error != -EOPNOTSUPP)
> - dm_flush(md);
> + /* if it's an empty flush or the preflush failed, we're done */
> + if (!bio_has_data(bio) || md->flush_error) {
> + if (md->flush_error != DM_ENDIO_REQUEUE)
> + bio_endio(bio, md->flush_error);
> + else {
> + spin_lock_irq(&md->deferred_lock);
> + bio_list_add_head(&md->deferred, bio);
> + spin_unlock_irq(&md->deferred_lock);
> + }
> + return;
> }
>
> - if (md->barrier_error != DM_ENDIO_REQUEUE)
> - bio_endio(bio, md->barrier_error);
> - else {
> - spin_lock_irq(&md->deferred_lock);
> - bio_list_add_head(&md->deferred, bio);
> - spin_unlock_irq(&md->deferred_lock);
> - }
> + /* issue data + REQ_FUA */
> + bio->bi_rw &= ~REQ_FLUSH;
> + __split_and_process_bio(md, bio);
> }
>
> /*
> @@ -2469,8 +2464,8 @@ static void dm_wq_work(struct work_struct *work)
> if (dm_request_based(md))
> generic_make_request(c);
> else {
> - if (c->bi_rw & REQ_HARDBARRIER)
> - process_barrier(md, c);
> + if (c->bi_rw & REQ_FLUSH)
> + process_flush(md, c);
> else
> __split_and_process_bio(md, c);
> }
> --
> 1.7.1
>
--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html

Re: [PATCH 23/41] dm: implement REQ_FLUSH/FUA support for bio-baseddm

am 10.09.2010 20:46:52 von Mike Snitzer

On Fri, Sep 10 2010 at 2:25pm -0400,
Mikulas Patocka wrote:

> I quite disagree with the patch. It changes too many things.
>
> What I said and what I want to see:
>
> Take dm code as is. Treat FLUSH requests as empty barriers.
>
>
> So I want to see a patch that only changes:
>
> bio_empty_barrier(bio) -> bio->bi_rw & REQ_FLUSH
> WRITE_BARRIER -> WRITE_FLUSH
> etc.
>
> so that the code compiles and works.
>
> DON'T CHANGE ANYTHING ELSE.
>
> Requirements of flushes are subset of requirements of barriers, so if you
> send flush and it is treated as a barrier inside DM, there's no problem.
> DM code that I wrote only sends out zero-data barriers and already treats
> them as flushes (it doesn't rely on ordering), so there's no problem with
> sent requests too.
>
> Once fluges get into kernel, I'll clean it up to allow parallel flushes
> and requests, etc. But not before. I don't want to work on an interface
> that is under development and may be changed.

Mikulas,

I agree that it is unfortunate that we're having to explore this level
of change to DM's flush support. Especially given how recently your
barrier code was added.

But the work has already been done. Rather than putting up artificial
barriers (no pun intended) it'd be great if you took the time to just
review the changes.

The patch header enumerates and describes the various changes quite
clearly.

And in fact, this first patch basically is as minimal as it gets
relative to bio-based DM's conversion to FLUSH+FUA.

Please direct your energy and talent in a positive way rather than
starting a potential flame.

Thanks,
Mike
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html

Re: [PATCH 23/41] dm: implement REQ_FLUSH/FUA support for bio-baseddm

am 10.09.2010 21:05:23 von Mikulas Patocka

On Fri, 10 Sep 2010, Mike Snitzer wrote:

> On Fri, Sep 10 2010 at 2:25pm -0400,
> Mikulas Patocka wrote:
>
> > I quite disagree with the patch. It changes too many things.
> >
> > What I said and what I want to see:
> >
> > Take dm code as is. Treat FLUSH requests as empty barriers.
> >
> >
> > So I want to see a patch that only changes:
> >
> > bio_empty_barrier(bio) -> bio->bi_rw & REQ_FLUSH
> > WRITE_BARRIER -> WRITE_FLUSH
> > etc.
> >
> > so that the code compiles and works.
> >
> > DON'T CHANGE ANYTHING ELSE.
> >
> > Requirements of flushes are subset of requirements of barriers, so if you
> > send flush and it is treated as a barrier inside DM, there's no problem.
> > DM code that I wrote only sends out zero-data barriers and already treats
> > them as flushes (it doesn't rely on ordering), so there's no problem with
> > sent requests too.
> >
> > Once fluges get into kernel, I'll clean it up to allow parallel flushes
> > and requests, etc. But not before. I don't want to work on an interface
> > that is under development and may be changed.
>
> Mikulas,
>
> I agree that it is unfortunate that we're having to explore this level
> of change to DM's flush support. Especially given how recently your
> barrier code was added.
>
> But the work has already been done. Rather than putting up artificial
> barriers (no pun intended) it'd be great if you took the time to just
> review the changes.
>
> The patch header enumerates and describes the various changes quite
> clearly.
>
> And in fact, this first patch basically is as minimal as it gets
> relative to bio-based DM's conversion to FLUSH+FUA.
>
> Please direct your energy and talent in a positive way rather than
> starting a potential flame.
>
> Thanks,
> Mike

I don't want to flame. I mean this:

* person X writes a patch P.
* person Y reads P, sees that the condition C is true and writes patch Q
that dependes on condition C.
* person X changes a patch P, so that the patch is correct but condition C
is no longer true.

Now, there is a bug in the patch Q and NEITHER X NOR Y can find out about
that bug.

That's why parallel development doesn't work.

If you develop on things in the kernel, it is different.
* person X writes a patch P and puts it in the kernel.
* person Y reads the kernel code, sees that the condition C is true and
writes a patch Q that assumes that the condition C is true. He puts this
patch to the kernel too.
* person X wants to change his code so that the condition C isn't true,
but it is now his responsibility to search the rest of the kernel to see
if it depends on the condition C. He searches the code and finds Q.

This is not a flamewar, just a technical explanation, why I don't want to
develop on interfaces that are not in the kernel.

Mikulas
--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html

Re: [PATCH 23/41] dm: implement REQ_FLUSH/FUA support for bio-baseddm

am 10.09.2010 21:24:04 von Mike Snitzer

On Fri, Sep 10 2010 at 3:05pm -0400,
Mikulas Patocka wrote:

>
>
> On Fri, 10 Sep 2010, Mike Snitzer wrote:
> > And in fact, this first patch basically is as minimal as it gets
> > relative to bio-based DM's conversion to FLUSH+FUA.
> >
> > Please direct your energy and talent in a positive way rather than
> > starting a potential flame.
> >
> > Thanks,
> > Mike
>
> I don't want to flame. I mean this:
>
> * person X writes a patch P.
> * person Y reads P, sees that the condition C is true and writes patch Q
> that dependes on condition C.
> * person X changes a patch P, so that the patch is correct but condition C
> is no longer true.
>
> Now, there is a bug in the patch Q and NEITHER X NOR Y can find out about
> that bug.
>
> That's why parallel development doesn't work.
>
> If you develop on things in the kernel, it is different.
> * person X writes a patch P and puts it in the kernel.
> * person Y reads the kernel code, sees that the condition C is true and
> writes a patch Q that assumes that the condition C is true. He puts this
> patch to the kernel too.
> * person X wants to change his code so that the condition C isn't true,
> but it is now his responsibility to search the rest of the kernel to see
> if it depends on the condition C. He searches the code and finds Q.
>
> This is not a flamewar, just a technical explanation, why I don't want to
> develop on interfaces that are not in the kernel.

We're reasonable people and can certainly prevent a flamewar but what
you're doing is an elaborate distraction. The energy it took you to
write and reason through your logic above could've been used to just
review the DM FLUSH+FUA patches.

The various interfaces are hardened now and staged for inclussion in
2.6.37. Jens has already pulled the entire 40+ patchset into his
for-next branch for wider linux-next testing.

Tejun, Christoph and others have done an amazing job with this
conversion. The fact that Tejun tackled the heavy lifting of DM's
conversion was unexpected but 100% appreciated (by me and I assume
others). Please don't dismiss, or misrepresent the status of, this
FLUSH+FUA work.

Mike
--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html

Re: [PATCH 23/41] dm: implement REQ_FLUSH/FUA support for bio-baseddm

am 10.09.2010 22:06:01 von Mikulas Patocka

On Fri, 10 Sep 2010, Mike Snitzer wrote:

> On Fri, Sep 10 2010 at 3:05pm -0400,
> Mikulas Patocka wrote:
>
> >
> >
> > On Fri, 10 Sep 2010, Mike Snitzer wrote:
> > > And in fact, this first patch basically is as minimal as it gets
> > > relative to bio-based DM's conversion to FLUSH+FUA.
> > >
> > > Please direct your energy and talent in a positive way rather than
> > > starting a potential flame.
> > >
> > > Thanks,
> > > Mike
> >
> > I don't want to flame. I mean this:
> >
> > * person X writes a patch P.
> > * person Y reads P, sees that the condition C is true and writes patch Q
> > that dependes on condition C.
> > * person X changes a patch P, so that the patch is correct but condition C
> > is no longer true.
> >
> > Now, there is a bug in the patch Q and NEITHER X NOR Y can find out about
> > that bug.
> >
> > That's why parallel development doesn't work.
> >
> > If you develop on things in the kernel, it is different.
> > * person X writes a patch P and puts it in the kernel.
> > * person Y reads the kernel code, sees that the condition C is true and
> > writes a patch Q that assumes that the condition C is true. He puts this
> > patch to the kernel too.
> > * person X wants to change his code so that the condition C isn't true,
> > but it is now his responsibility to search the rest of the kernel to see
> > if it depends on the condition C. He searches the code and finds Q.
> >
> > This is not a flamewar, just a technical explanation, why I don't want to
> > develop on interfaces that are not in the kernel.
>
> We're reasonable people and can certainly prevent a flamewar but what
> you're doing is an elaborate distraction. The energy it took you to
> write and reason through your logic above could've been used to just
> review the DM FLUSH+FUA patches.

No. If I reviewed 40 patches perfectly, I would take long long time (the
previous 2-line patch that I reviewed took me a week to review --- but I
found a flaw that the other people who reviewed it quickly didn't find).

So I reviewed only "dm" patch and found out that it is too big.

Make a smaller patch with barrier -> FLUSH logic only. And then you can
make additional patches with function/variable renames or logic changes.

> The various interfaces are hardened now and staged for inclussion in
> 2.6.37. Jens has already pulled the entire 40+ patchset into his
> for-next branch for wider linux-next testing.
>
> Tejun, Christoph and others have done an amazing job with this
> conversion. The fact that Tejun tackled the heavy lifting of DM's
> conversion was unexpected but 100% appreciated (by me and I assume
> others). Please don't dismiss, or misrepresent the status of, this
> FLUSH+FUA work.

I am not dismissing anything. I agree with barrier -> flush change. It
simplifies things a lot.

But I have my work rules that I learned: I use no git kernels and no
external patches (except Alasdair's patchset that I want to test). I only
use -rc or final kernels. I need a stable computer --- I don't want to
solve problems like "does it crash because I pulled something or does it
crash because I made a bug in my code?" So, put that into 2.6.37-rc1 and
I'll optimize flushes in dm for -rc2 or -rc3.

Mikulas
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html

Re: [PATCH 23/41] dm: implement REQ_FLUSH/FUA support for bio-baseddm

am 11.09.2010 01:36:21 von Tejun Heo

On 09/10/2010 10:06 PM, Mikulas Patocka wrote:
> But I have my work rules that I learned: I use no git kernels and no
> external patches (except Alasdair's patchset that I want to test). I only
> use -rc or final kernels. I need a stable computer --- I don't want to
> solve problems like "does it crash because I pulled something or does it
> crash because I made a bug in my code?" So, put that into 2.6.37-rc1 and
> I'll optimize flushes in dm for -rc2 or -rc3.

Alright, I'm sorry but this is as far as I would go for dm conversion
patches. If you wanna split it further or do it your way, please feel
free to. I think it would be beneficial to do things now but, hey,
you guys are maintaining dm part of the kernel, so it's up to you
guys. But, I think it would be silly for everyone else to wait for
the rather special requirement for dm, so if we have to go forward
without dm updates, I suppose we will have to. Jens, please feel free
to drop dm conversion patches.

Thanks.

--
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html

Re: [PATCH 23/41] dm: implement REQ_FLUSH/FUA support for bio-baseddm

am 11.09.2010 03:46:12 von Mike Snitzer

On Fri, Sep 10 2010 at 7:36pm -0400,
Tejun Heo wrote:

> On 09/10/2010 10:06 PM, Mikulas Patocka wrote:
> > But I have my work rules that I learned: I use no git kernels and no
> > external patches (except Alasdair's patchset that I want to test). I only
> > use -rc or final kernels. I need a stable computer --- I don't want to
> > solve problems like "does it crash because I pulled something or does it
> > crash because I made a bug in my code?" So, put that into 2.6.37-rc1 and
> > I'll optimize flushes in dm for -rc2 or -rc3.
>
> Alright, I'm sorry but this is as far as I would go for dm conversion
> patches. If you wanna split it further or do it your way, please feel
> free to. I think it would be beneficial to do things now but, hey,
> you guys are maintaining dm part of the kernel, so it's up to you
> guys. But, I think it would be silly for everyone else to wait for
> the rather special requirement for dm, so if we have to go forward
> without dm updates, I suppose we will have to. Jens, please feel free
> to drop dm conversion patches.

Tejun,

Mikulas doesn't speak for Alasdair or the rest of the DM developers. He
speaks for himself. He, like me, is a member of the team that helps
maintain DM. But Alasdair is the upstream DM maintainer.

Please don't latch on to Mikulas' disruptive stone-walling. As I shared
in my previous reply: your FLUSH+FUA contributions to DM are very much
appreciated! Kiyoshi, Jun'ichi and myself have all worked with you
effectively and so far the end result DM conversion has proven quite
stable and correct. Wider testing via linux-next is an important next
step.

Jens, please don't drop the DM FLUSH+FUA conversion patches from your
'for-next' branch. Mikulas has yet to offer a single substantive
criticism of the code in question.

Alasdair, please advise ASAP.

Thanks,
Mike
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html

Re: [PATCH 23/41] dm: implement REQ_FLUSH/FUA support for bio-baseddm

am 11.09.2010 14:19:33 von Ric Wheeler

On 09/10/2010 07:36 PM, Tejun Heo wrote:
> On 09/10/2010 10:06 PM, Mikulas Patocka wrote:
>> But I have my work rules that I learned: I use no git kernels and no
>> external patches (except Alasdair's patchset that I want to test). I only
>> use -rc or final kernels. I need a stable computer --- I don't want to
>> solve problems like "does it crash because I pulled something or does it
>> crash because I made a bug in my code?" So, put that into 2.6.37-rc1 and
>> I'll optimize flushes in dm for -rc2 or -rc3.
> Alright, I'm sorry but this is as far as I would go for dm conversion
> patches. If you wanna split it further or do it your way, please feel
> free to. I think it would be beneficial to do things now but, hey,
> you guys are maintaining dm part of the kernel, so it's up to you
> guys. But, I think it would be silly for everyone else to wait for
> the rather special requirement for dm, so if we have to go forward
> without dm updates, I suppose we will have to. Jens, please feel free
> to drop dm conversion patches.
>
> Thanks.
>

I think that we certainly want to continue to fix the whole path - including dm
- so we do not end up with the fragmented set of works here/doesn't work there
situation we had for several years. If Mikulas has better patches, he should
post them for review.

Thanks!

Ric

Re: [PATCH 23/41] dm: implement REQ_FLUSH/FUA support for bio-baseddm

am 13.09.2010 21:01:38 von Mikulas Patocka

On Sat, 11 Sep 2010, Tejun Heo wrote:

> On 09/10/2010 10:06 PM, Mikulas Patocka wrote:
> > But I have my work rules that I learned: I use no git kernels and no
> > external patches (except Alasdair's patchset that I want to test). I only
> > use -rc or final kernels. I need a stable computer --- I don't want to
> > solve problems like "does it crash because I pulled something or does it
> > crash because I made a bug in my code?" So, put that into 2.6.37-rc1 and
> > I'll optimize flushes in dm for -rc2 or -rc3.
>
> Alright, I'm sorry but this is as far as I would go for dm conversion
> patches. If you wanna split it further or do it your way, please feel
> free to. I think it would be beneficial to do things now but, hey,
> you guys are maintaining dm part of the kernel, so it's up to you
> guys. But, I think it would be silly for everyone else to wait for
> the rather special requirement for dm, so if we have to go forward
> without dm updates, I suppose we will have to. Jens, please feel free
> to drop dm conversion patches.
>
> Thanks.
> --
> tejun

I think: take the first patch and submit it on your own, and send the rest
of patches to Alasdair to his tree. If I get such big patches I am unable
to immediatelly say if they are correct or not.

Mikulas
--
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html