summaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/md.c260
-rw-r--r--drivers/md/md.h28
-rw-r--r--drivers/md/persistent-data/dm-block-manager.c4
-rw-r--r--drivers/md/persistent-data/dm-btree.h2
-rw-r--r--drivers/md/raid1.c15
-rw-r--r--drivers/md/raid10.c15
-rw-r--r--drivers/md/raid5.c57
7 files changed, 196 insertions, 185 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 61200717687..3db3d1b271f 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -452,7 +452,7 @@ void md_flush_request(struct mddev *mddev, struct bio *bio)
spin_lock_irq(&mddev->write_lock);
wait_event_lock_irq(mddev->sb_wait,
!mddev->flush_bio,
- mddev->write_lock, /*nothing*/);
+ mddev->write_lock);
mddev->flush_bio = bio;
spin_unlock_irq(&mddev->write_lock);
@@ -1414,12 +1414,11 @@ static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb)
unsigned long long newcsum;
int size = 256 + le32_to_cpu(sb->max_dev)*2;
__le32 *isuper = (__le32*)sb;
- int i;
disk_csum = sb->sb_csum;
sb->sb_csum = 0;
newcsum = 0;
- for (i=0; size>=4; size -= 4 )
+ for (; size >= 4; size -= 4)
newcsum += le32_to_cpu(*isuper++);
if (size == 2)
@@ -4124,7 +4123,7 @@ static struct md_sysfs_entry md_size =
__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
-/* Metdata version.
+/* Metadata version.
* This is one of
* 'none' for arrays with no metadata (good luck...)
* 'external' for arrays with externally managed metadata,
@@ -4753,6 +4752,8 @@ md_attr_store(struct kobject *kobj, struct attribute *attr,
}
mddev_get(mddev);
spin_unlock(&all_mddevs_lock);
+ if (entry->store == new_dev_store)
+ flush_workqueue(md_misc_wq);
rv = mddev_lock(mddev);
if (!rv) {
rv = entry->store(mddev, page, length);
@@ -6346,24 +6347,23 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
* Commands dealing with the RAID driver but not any
* particular array:
*/
- switch (cmd)
- {
- case RAID_VERSION:
- err = get_version(argp);
- goto done;
+ switch (cmd) {
+ case RAID_VERSION:
+ err = get_version(argp);
+ goto done;
- case PRINT_RAID_DEBUG:
- err = 0;
- md_print_devices();
- goto done;
+ case PRINT_RAID_DEBUG:
+ err = 0;
+ md_print_devices();
+ goto done;
#ifndef MODULE
- case RAID_AUTORUN:
- err = 0;
- autostart_arrays(arg);
- goto done;
+ case RAID_AUTORUN:
+ err = 0;
+ autostart_arrays(arg);
+ goto done;
#endif
- default:;
+ default:;
}
/*
@@ -6398,6 +6398,10 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
goto abort;
}
+ if (cmd == ADD_NEW_DISK)
+ /* need to ensure md_delayed_delete() has completed */
+ flush_workqueue(md_misc_wq);
+
err = mddev_lock(mddev);
if (err) {
printk(KERN_INFO
@@ -6406,50 +6410,44 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
goto abort;
}
- switch (cmd)
- {
- case SET_ARRAY_INFO:
- {
- mdu_array_info_t info;
- if (!arg)
- memset(&info, 0, sizeof(info));
- else if (copy_from_user(&info, argp, sizeof(info))) {
- err = -EFAULT;
- goto abort_unlock;
- }
- if (mddev->pers) {
- err = update_array_info(mddev, &info);
- if (err) {
- printk(KERN_WARNING "md: couldn't update"
- " array info. %d\n", err);
- goto abort_unlock;
- }
- goto done_unlock;
- }
- if (!list_empty(&mddev->disks)) {
- printk(KERN_WARNING
- "md: array %s already has disks!\n",
- mdname(mddev));
- err = -EBUSY;
- goto abort_unlock;
- }
- if (mddev->raid_disks) {
- printk(KERN_WARNING
- "md: array %s already initialised!\n",
- mdname(mddev));
- err = -EBUSY;
- goto abort_unlock;
- }
- err = set_array_info(mddev, &info);
- if (err) {
- printk(KERN_WARNING "md: couldn't set"
- " array info. %d\n", err);
- goto abort_unlock;
- }
+ if (cmd == SET_ARRAY_INFO) {
+ mdu_array_info_t info;
+ if (!arg)
+ memset(&info, 0, sizeof(info));
+ else if (copy_from_user(&info, argp, sizeof(info))) {
+ err = -EFAULT;
+ goto abort_unlock;
+ }
+ if (mddev->pers) {
+ err = update_array_info(mddev, &info);
+ if (err) {
+ printk(KERN_WARNING "md: couldn't update"
+ " array info. %d\n", err);
+ goto abort_unlock;
}
goto done_unlock;
-
- default:;
+ }
+ if (!list_empty(&mddev->disks)) {
+ printk(KERN_WARNING
+ "md: array %s already has disks!\n",
+ mdname(mddev));
+ err = -EBUSY;
+ goto abort_unlock;
+ }
+ if (mddev->raid_disks) {
+ printk(KERN_WARNING
+ "md: array %s already initialised!\n",
+ mdname(mddev));
+ err = -EBUSY;
+ goto abort_unlock;
+ }
+ err = set_array_info(mddev, &info);
+ if (err) {
+ printk(KERN_WARNING "md: couldn't set"
+ " array info. %d\n", err);
+ goto abort_unlock;
+ }
+ goto done_unlock;
}
/*
@@ -6468,52 +6466,51 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
/*
* Commands even a read-only array can execute:
*/
- switch (cmd)
- {
- case GET_BITMAP_FILE:
- err = get_bitmap_file(mddev, argp);
- goto done_unlock;
+ switch (cmd) {
+ case GET_BITMAP_FILE:
+ err = get_bitmap_file(mddev, argp);
+ goto done_unlock;
- case RESTART_ARRAY_RW:
- err = restart_array(mddev);
- goto done_unlock;
+ case RESTART_ARRAY_RW:
+ err = restart_array(mddev);
+ goto done_unlock;
- case STOP_ARRAY:
- err = do_md_stop(mddev, 0, bdev);
- goto done_unlock;
+ case STOP_ARRAY:
+ err = do_md_stop(mddev, 0, bdev);
+ goto done_unlock;
- case STOP_ARRAY_RO:
- err = md_set_readonly(mddev, bdev);
- goto done_unlock;
+ case STOP_ARRAY_RO:
+ err = md_set_readonly(mddev, bdev);
+ goto done_unlock;
- case BLKROSET:
- if (get_user(ro, (int __user *)(arg))) {
- err = -EFAULT;
- goto done_unlock;
- }
- err = -EINVAL;
+ case BLKROSET:
+ if (get_user(ro, (int __user *)(arg))) {
+ err = -EFAULT;
+ goto done_unlock;
+ }
+ err = -EINVAL;
- /* if the bdev is going readonly the value of mddev->ro
- * does not matter, no writes are coming
- */
- if (ro)
- goto done_unlock;
+ /* if the bdev is going readonly the value of mddev->ro
+ * does not matter, no writes are coming
+ */
+ if (ro)
+ goto done_unlock;
- /* are we are already prepared for writes? */
- if (mddev->ro != 1)
- goto done_unlock;
+ /* are we are already prepared for writes? */
+ if (mddev->ro != 1)
+ goto done_unlock;
- /* transitioning to readauto need only happen for
- * arrays that call md_write_start
- */
- if (mddev->pers) {
- err = restart_array(mddev);
- if (err == 0) {
- mddev->ro = 2;
- set_disk_ro(mddev->gendisk, 0);
- }
+ /* transitioning to readauto need only happen for
+ * arrays that call md_write_start
+ */
+ if (mddev->pers) {
+ err = restart_array(mddev);
+ if (err == 0) {
+ mddev->ro = 2;
+ set_disk_ro(mddev->gendisk, 0);
}
- goto done_unlock;
+ }
+ goto done_unlock;
}
/*
@@ -6535,37 +6532,36 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
}
}
- switch (cmd)
+ switch (cmd) {
+ case ADD_NEW_DISK:
{
- case ADD_NEW_DISK:
- {
- mdu_disk_info_t info;
- if (copy_from_user(&info, argp, sizeof(info)))
- err = -EFAULT;
- else
- err = add_new_disk(mddev, &info);
- goto done_unlock;
- }
+ mdu_disk_info_t info;
+ if (copy_from_user(&info, argp, sizeof(info)))
+ err = -EFAULT;
+ else
+ err = add_new_disk(mddev, &info);
+ goto done_unlock;
+ }
- case HOT_REMOVE_DISK:
- err = hot_remove_disk(mddev, new_decode_dev(arg));
- goto done_unlock;
+ case HOT_REMOVE_DISK:
+ err = hot_remove_disk(mddev, new_decode_dev(arg));
+ goto done_unlock;
- case HOT_ADD_DISK:
- err = hot_add_disk(mddev, new_decode_dev(arg));
- goto done_unlock;
+ case HOT_ADD_DISK:
+ err = hot_add_disk(mddev, new_decode_dev(arg));
+ goto done_unlock;
- case RUN_ARRAY:
- err = do_md_run(mddev);
- goto done_unlock;
+ case RUN_ARRAY:
+ err = do_md_run(mddev);
+ goto done_unlock;
- case SET_BITMAP_FILE:
- err = set_bitmap_file(mddev, (int)arg);
- goto done_unlock;
+ case SET_BITMAP_FILE:
+ err = set_bitmap_file(mddev, (int)arg);
+ goto done_unlock;
- default:
- err = -EINVAL;
- goto abort_unlock;
+ default:
+ err = -EINVAL;
+ goto abort_unlock;
}
done_unlock:
@@ -7184,6 +7180,7 @@ void md_done_sync(struct mddev *mddev, int blocks, int ok)
wake_up(&mddev->recovery_wait);
if (!ok) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+ set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
md_wakeup_thread(mddev->thread);
// stop recovery, signal do_sync ....
}
@@ -7281,6 +7278,7 @@ EXPORT_SYMBOL_GPL(md_allow_write);
#define SYNC_MARKS 10
#define SYNC_MARK_STEP (3*HZ)
+#define UPDATE_FREQUENCY (5*60*HZ)
void md_do_sync(struct md_thread *thread)
{
struct mddev *mddev = thread->mddev;
@@ -7289,6 +7287,7 @@ void md_do_sync(struct md_thread *thread)
window;
sector_t max_sectors,j, io_sectors;
unsigned long mark[SYNC_MARKS];
+ unsigned long update_time;
sector_t mark_cnt[SYNC_MARKS];
int last_mark,m;
struct list_head *tmp;
@@ -7448,6 +7447,7 @@ void md_do_sync(struct md_thread *thread)
mddev->curr_resync_completed = j;
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
md_new_event(mddev);
+ update_time = jiffies;
blk_start_plug(&plug);
while (j < max_sectors) {
@@ -7459,6 +7459,7 @@ void md_do_sync(struct md_thread *thread)
((mddev->curr_resync > mddev->curr_resync_completed &&
(mddev->curr_resync - mddev->curr_resync_completed)
> (max_sectors >> 4)) ||
+ time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
(j - mddev->curr_resync_completed)*2
>= mddev->resync_max - mddev->curr_resync_completed
)) {
@@ -7466,6 +7467,10 @@ void md_do_sync(struct md_thread *thread)
wait_event(mddev->recovery_wait,
atomic_read(&mddev->recovery_active) == 0);
mddev->curr_resync_completed = j;
+ if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
+ j > mddev->recovery_cp)
+ mddev->recovery_cp = j;
+ update_time = jiffies;
set_bit(MD_CHANGE_CLEAN, &mddev->flags);
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
}
@@ -7570,8 +7575,13 @@ void md_do_sync(struct md_thread *thread)
printk(KERN_INFO
"md: checkpointing %s of %s.\n",
desc, mdname(mddev));
- mddev->recovery_cp =
- mddev->curr_resync_completed;
+ if (test_bit(MD_RECOVERY_ERROR,
+ &mddev->recovery))
+ mddev->recovery_cp =
+ mddev->curr_resync_completed;
+ else
+ mddev->recovery_cp =
+ mddev->curr_resync;
}
} else
mddev->recovery_cp = MaxSector;
diff --git a/drivers/md/md.h b/drivers/md/md.h
index af443ab868d..eca59c3074e 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -307,6 +307,7 @@ struct mddev {
* REQUEST: user-space has requested a sync (used with SYNC)
* CHECK: user-space request for check-only, no repair
* RESHAPE: A reshape is happening
+ * ERROR: sync-action interrupted because io-error
*
* If neither SYNC or RESHAPE are set, then it is a recovery.
*/
@@ -320,6 +321,7 @@ struct mddev {
#define MD_RECOVERY_CHECK 7
#define MD_RECOVERY_RESHAPE 8
#define MD_RECOVERY_FROZEN 9
+#define MD_RECOVERY_ERROR 10
unsigned long recovery;
/* If a RAID personality determines that recovery (of a particular
@@ -551,32 +553,6 @@ struct md_thread {
#define THREAD_WAKEUP 0
-#define __wait_event_lock_irq(wq, condition, lock, cmd) \
-do { \
- wait_queue_t __wait; \
- init_waitqueue_entry(&__wait, current); \
- \
- add_wait_queue(&wq, &__wait); \
- for (;;) { \
- set_current_state(TASK_UNINTERRUPTIBLE); \
- if (condition) \
- break; \
- spin_unlock_irq(&lock); \
- cmd; \
- schedule(); \
- spin_lock_irq(&lock); \
- } \
- current->state = TASK_RUNNING; \
- remove_wait_queue(&wq, &__wait); \
-} while (0)
-
-#define wait_event_lock_irq(wq, condition, lock, cmd) \
-do { \
- if (condition) \
- break; \
- __wait_event_lock_irq(wq, condition, lock, cmd); \
-} while (0)
-
static inline void safe_put_page(struct page *p)
{
if (p) put_page(p);
diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c
index ec4cb3c58a0..28c3ed072a7 100644
--- a/drivers/md/persistent-data/dm-block-manager.c
+++ b/drivers/md/persistent-data/dm-block-manager.c
@@ -25,7 +25,7 @@
* may be held at once. This is just an implementation detail.
*
* ii) Recursive locking attempts are detected and return EINVAL. A stack
- * trace is also emitted for the previous lock aquisition.
+ * trace is also emitted for the previous lock acquisition.
*
* iii) Priority is given to write locks.
*/
@@ -109,7 +109,7 @@ static int __check_holder(struct block_lock *lock)
DMERR("previously held here:");
print_stack_trace(lock->traces + i, 4);
- DMERR("subsequent aquisition attempted here:");
+ DMERR("subsequent acquisition attempted here:");
t.nr_entries = 0;
t.max_entries = MAX_STACK;
t.entries = entries;
diff --git a/drivers/md/persistent-data/dm-btree.h b/drivers/md/persistent-data/dm-btree.h
index ae02c84410f..a2cd50441ca 100644
--- a/drivers/md/persistent-data/dm-btree.h
+++ b/drivers/md/persistent-data/dm-btree.h
@@ -35,7 +35,7 @@ struct dm_transaction_manager;
*/
/*
- * Infomation about the values stored within the btree.
+ * Information about the values stored within the btree.
*/
struct dm_btree_value_type {
void *context;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a0f73092176..d5bddfc4010 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -822,7 +822,7 @@ static void raise_barrier(struct r1conf *conf)
/* Wait until no block IO is waiting */
wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting,
- conf->resync_lock, );
+ conf->resync_lock);
/* block any new IO from starting */
conf->barrier++;
@@ -830,7 +830,7 @@ static void raise_barrier(struct r1conf *conf)
/* Now wait for all pending IO to complete */
wait_event_lock_irq(conf->wait_barrier,
!conf->nr_pending && conf->barrier < RESYNC_DEPTH,
- conf->resync_lock, );
+ conf->resync_lock);
spin_unlock_irq(&conf->resync_lock);
}
@@ -864,8 +864,7 @@ static void wait_barrier(struct r1conf *conf)
(conf->nr_pending &&
current->bio_list &&
!bio_list_empty(current->bio_list)),
- conf->resync_lock,
- );
+ conf->resync_lock);
conf->nr_waiting--;
}
conf->nr_pending++;
@@ -898,10 +897,10 @@ static void freeze_array(struct r1conf *conf)
spin_lock_irq(&conf->resync_lock);
conf->barrier++;
conf->nr_waiting++;
- wait_event_lock_irq(conf->wait_barrier,
- conf->nr_pending == conf->nr_queued+1,
- conf->resync_lock,
- flush_pending_writes(conf));
+ wait_event_lock_irq_cmd(conf->wait_barrier,
+ conf->nr_pending == conf->nr_queued+1,
+ conf->resync_lock,
+ flush_pending_writes(conf));
spin_unlock_irq(&conf->resync_lock);
}
static void unfreeze_array(struct r1conf *conf)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index c9acbd71713..64d48249c03 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -952,7 +952,7 @@ static void raise_barrier(struct r10conf *conf, int force)
/* Wait until no block IO is waiting (unless 'force') */
wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
- conf->resync_lock, );
+ conf->resync_lock);
/* block any new IO from starting */
conf->barrier++;
@@ -960,7 +960,7 @@ static void raise_barrier(struct r10conf *conf, int force)
/* Now wait for all pending IO to complete */
wait_event_lock_irq(conf->wait_barrier,
!conf->nr_pending && conf->barrier < RESYNC_DEPTH,
- conf->resync_lock, );
+ conf->resync_lock);
spin_unlock_irq(&conf->resync_lock);
}
@@ -993,8 +993,7 @@ static void wait_barrier(struct r10conf *conf)
(conf->nr_pending &&
current->bio_list &&
!bio_list_empty(current->bio_list)),
- conf->resync_lock,
- );
+ conf->resync_lock);
conf->nr_waiting--;
}
conf->nr_pending++;
@@ -1027,10 +1026,10 @@ static void freeze_array(struct r10conf *conf)
spin_lock_irq(&conf->resync_lock);
conf->barrier++;
conf->nr_waiting++;
- wait_event_lock_irq(conf->wait_barrier,
- conf->nr_pending == conf->nr_queued+1,
- conf->resync_lock,
- flush_pending_writes(conf));
+ wait_event_lock_irq_cmd(conf->wait_barrier,
+ conf->nr_pending == conf->nr_queued+1,
+ conf->resync_lock,
+ flush_pending_writes(conf));
spin_unlock_irq(&conf->resync_lock);
}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index a4502686e7a..19d77a02663 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -53,6 +53,8 @@
#include <linux/cpu.h>
#include <linux/slab.h>
#include <linux/ratelimit.h>
+#include <trace/events/block.h>
+
#include "md.h"
#include "raid5.h"
#include "raid0.h"
@@ -182,6 +184,8 @@ static void return_io(struct bio *return_bi)
return_bi = bi->bi_next;
bi->bi_next = NULL;
bi->bi_size = 0;
+ trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
+ bi, 0);
bio_endio(bi, 0);
bi = return_bi;
}
@@ -466,7 +470,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
do {
wait_event_lock_irq(conf->wait_for_stripe,
conf->quiesce == 0 || noquiesce,
- conf->device_lock, /* nothing */);
+ conf->device_lock);
sh = __find_stripe(conf, sector, conf->generation - previous);
if (!sh) {
if (!conf->inactive_blocked)
@@ -480,8 +484,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
(atomic_read(&conf->active_stripes)
< (conf->max_nr_stripes *3/4)
|| !conf->inactive_blocked),
- conf->device_lock,
- );
+ conf->device_lock);
conf->inactive_blocked = 0;
} else
init_stripe(sh, sector, previous);
@@ -671,6 +674,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
bi->bi_next = NULL;
if (rrdev)
set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
+ trace_block_bio_remap(bdev_get_queue(bi->bi_bdev),
+ bi, disk_devt(conf->mddev->gendisk),
+ sh->dev[i].sector);
generic_make_request(bi);
}
if (rrdev) {
@@ -698,6 +704,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
rbi->bi_io_vec[0].bv_offset = 0;
rbi->bi_size = STRIPE_SIZE;
rbi->bi_next = NULL;
+ trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev),
+ rbi, disk_devt(conf->mddev->gendisk),
+ sh->dev[i].sector);
generic_make_request(rbi);
}
if (!rdev && !rrdev) {
@@ -1576,7 +1585,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
* This happens in stages:
* 1/ create a new kmem_cache and allocate the required number of
* stripe_heads.
- * 2/ gather all the old stripe_heads and tranfer the pages across
+ * 2/ gather all the old stripe_heads and transfer the pages across
* to the new stripe_heads. This will have the side effect of
* freezing the array as once all stripe_heads have been collected,
* no IO will be possible. Old stripe heads are freed once their
@@ -1646,8 +1655,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
spin_lock_irq(&conf->device_lock);
wait_event_lock_irq(conf->wait_for_stripe,
!list_empty(&conf->inactive_list),
- conf->device_lock,
- );
+ conf->device_lock);
osh = get_free_stripe(conf);
spin_unlock_irq(&conf->device_lock);
atomic_set(&nsh->count, 1);
@@ -2855,8 +2863,10 @@ static void handle_stripe_dirtying(struct r5conf *conf,
pr_debug("for sector %llu, rmw=%d rcw=%d\n",
(unsigned long long)sh->sector, rmw, rcw);
set_bit(STRIPE_HANDLE, &sh->state);
- if (rmw < rcw && rmw > 0)
+ if (rmw < rcw && rmw > 0) {
/* prefer read-modify-write, but need to get some data */
+ blk_add_trace_msg(conf->mddev->queue, "raid5 rmw %llu %d",
+ (unsigned long long)sh->sector, rmw);
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
if ((dev->towrite || i == sh->pd_idx) &&
@@ -2867,7 +2877,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
if (
test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
pr_debug("Read_old block "
- "%d for r-m-w\n", i);
+ "%d for r-m-w\n", i);
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantread, &dev->flags);
s->locked++;
@@ -2877,8 +2887,10 @@ static void handle_stripe_dirtying(struct r5conf *conf,
}
}
}
+ }
if (rcw <= rmw && rcw > 0) {
/* want reconstruct write, but need to get some data */
+ int qread =0;
rcw = 0;
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
@@ -2897,12 +2909,17 @@ static void handle_stripe_dirtying(struct r5conf *conf,
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantread, &dev->flags);
s->locked++;
+ qread++;
} else {
set_bit(STRIPE_DELAYED, &sh->state);
set_bit(STRIPE_HANDLE, &sh->state);
}
}
}
+ if (rcw)
+ blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d",
+ (unsigned long long)sh->sector,
+ rcw, qread, test_bit(STRIPE_DELAYED, &sh->state));
}
/* now if nothing is locked, and if we have enough data,
* we can start a write request
@@ -3224,10 +3241,7 @@ static void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh)
}
/* done submitting copies, wait for them to complete */
- if (tx) {
- async_tx_ack(tx);
- dma_wait_for_async_tx(tx);
- }
+ async_tx_quiesce(&tx);
}
/*
@@ -3903,6 +3917,8 @@ static void raid5_align_endio(struct bio *bi, int error)
rdev_dec_pending(rdev, conf->mddev);
if (!error && uptodate) {
+ trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev),
+ raid_bi, 0);
bio_endio(raid_bi, 0);
if (atomic_dec_and_test(&conf->active_aligned_reads))
wake_up(&conf->wait_for_stripe);
@@ -4003,10 +4019,13 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
spin_lock_irq(&conf->device_lock);
wait_event_lock_irq(conf->wait_for_stripe,
conf->quiesce == 0,
- conf->device_lock, /* nothing */);
+ conf->device_lock);
atomic_inc(&conf->active_aligned_reads);
spin_unlock_irq(&conf->device_lock);
+ trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev),
+ align_bi, disk_devt(mddev->gendisk),
+ raid_bio->bi_sector);
generic_make_request(align_bi);
return 1;
} else {
@@ -4081,6 +4100,7 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
struct stripe_head *sh;
struct mddev *mddev = cb->cb.data;
struct r5conf *conf = mddev->private;
+ int cnt = 0;
if (cb->list.next && !list_empty(&cb->list)) {
spin_lock_irq(&conf->device_lock);
@@ -4095,9 +4115,11 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
smp_mb__before_clear_bit();
clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state);
__release_stripe(conf, sh);
+ cnt++;
}
spin_unlock_irq(&conf->device_lock);
}
+ trace_block_unplug(mddev->queue, cnt, !from_schedule);
kfree(cb);
}
@@ -4355,6 +4377,8 @@ static void make_request(struct mddev *mddev, struct bio * bi)
if ( rw == WRITE )
md_write_end(mddev);
+ trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
+ bi, 0);
bio_endio(bi, 0);
}
}
@@ -4731,8 +4755,11 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
handled++;
}
remaining = raid5_dec_bi_active_stripes(raid_bio);
- if (remaining == 0)
+ if (remaining == 0) {
+ trace_block_bio_complete(bdev_get_queue(raid_bio->bi_bdev),
+ raid_bio, 0);
bio_endio(raid_bio, 0);
+ }
if (atomic_dec_and_test(&conf->active_aligned_reads))
wake_up(&conf->wait_for_stripe);
return handled;
@@ -6095,7 +6122,7 @@ static void raid5_quiesce(struct mddev *mddev, int state)
wait_event_lock_irq(conf->wait_for_stripe,
atomic_read(&conf->active_stripes) == 0 &&
atomic_read(&conf->active_aligned_reads) == 0,
- conf->device_lock, /* nothing */);
+ conf->device_lock);
conf->quiesce = 1;
spin_unlock_irq(&conf->device_lock);
/* allow reshape to continue */