diff -purN linux-2.6.24.2/block/ll_rw_blk.c linux-2.6.24.2-new/block/ll_rw_blk.c --- linux-2.6.24.2/block/ll_rw_blk.c 2008-02-10 21:51:11.000000000 -0800 +++ linux-2.6.24.2-new/block/ll_rw_blk.c 2008-06-02 16:23:00.000000000 -0700 @@ -3210,9 +3210,9 @@ static inline int bio_check_eod(struct b */ static inline void __generic_make_request(struct bio *bio) { - struct request_queue *q; + struct request_queue *q = bdev_get_queue(bio->bi_bdev); sector_t old_sector; - int ret, nr_sectors = bio_sectors(bio); + int nr_sectors = bio_sectors(bio); dev_t old_dev; int err = -EIO; @@ -3221,6 +3221,13 @@ static inline void __generic_make_reques if (bio_check_eod(bio, nr_sectors)) goto end_io; + if (q && q->metric && !bio->bi_queue) { + int need = bio->bi_max_vecs; + bio->bi_queue = q; + /* FIXME: potential race if atomic_sub is called in the middle of condition check */ + wait_event(q->throttle_wait, atomic_read(&q->available) >= need); + atomic_sub(need, &q->available); + } /* * Resolve the mapping until finished. (drivers are * still free to implement/resolve their own stacking @@ -3231,10 +3238,9 @@ static inline void __generic_make_reques */ old_sector = -1; old_dev = 0; - do { + while (1) { char b[BDEVNAME_SIZE]; - q = bdev_get_queue(bio->bi_bdev); if (!q) { printk(KERN_ERR "generic_make_request: Trying to access " @@ -3282,8 +3288,10 @@ end_io: goto end_io; } - ret = q->make_request_fn(q, bio); - } while (ret); + if (!q->make_request_fn(q, bio)) + return; + q = bdev_get_queue(bio->bi_bdev); + } } /* diff -purN linux-2.6.24.2/drivers/md/dm.c linux-2.6.24.2-new/drivers/md/dm.c --- linux-2.6.24.2/drivers/md/dm.c 2008-02-10 21:51:11.000000000 -0800 +++ linux-2.6.24.2-new/drivers/md/dm.c 2008-06-02 16:32:05.000000000 -0700 @@ -22,6 +22,7 @@ #include #include #include +#include #define DM_MSG_PREFIX "core" @@ -130,6 +131,43 @@ struct mapped_device { #define MIN_IOS 256 static struct kmem_cache *_io_cache; static struct kmem_cache *_tio_cache; +static int sysctl_bio_throttle = 0; + +static ctl_table dm_table[] = { + { + .ctl_name = DEV_DM_BIO_THROTTLE, + .procname = "bio_throttle", + .data = &sysctl_bio_throttle, + .maxlen = sizeof(int), + .mode = S_IRUGO|S_IWUSR, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = 0 } +}; + +static ctl_table dm_dir_table[] = { + { + .ctl_name = DEV_DM, + .procname = "dm", + .maxlen = 0, + .mode = S_IRUGO|S_IXUGO, + .child = dm_table, + }, + { .ctl_name = 0 } +}; + +static struct ctl_table_header *dm_table_header; + +static ctl_table dm_root_table[] = { + { + .ctl_name = CTL_DEV, + .procname = "dev", + .maxlen = 0, + .mode = 0555, + .child = dm_dir_table, + }, + { .ctl_name = 0 } +}; static int __init local_init(void) { @@ -209,6 +247,7 @@ static int __init dm_init(void) goto bad; } + dm_table_header = register_sysctl_table(dm_root_table); return 0; bad: @@ -224,6 +263,7 @@ static void __exit dm_exit(void) while (i--) _exits[i](); + unregister_sysctl_table(dm_table_header); } /* @@ -809,6 +849,11 @@ static int __split_bio(struct mapped_dev * CRUD END *---------------------------------------------------------------*/ +static unsigned dm_metric(struct bio *bio) +{ + return 1; +} + /* * The request function that just remaps the bio built up by * dm_merge_bvec. @@ -967,6 +1012,7 @@ out: static struct block_device_operations dm_blk_dops; +#define DEFAULT_THROTTLE_CAPACITY 1000 /* * Allocate and initialise a blank device with a given minor. */ @@ -1009,6 +1055,13 @@ static struct mapped_device *alloc_dev(i goto bad1_free_minor; md->queue->queuedata = md; + if (sysctl_bio_throttle == 1) { + md->queue->metric = dm_metric; + /* A dm device constructor may change the throttle capacity */ + atomic_set(&md->queue->available, md->queue->capacity = DEFAULT_THROTTLE_CAPACITY); + init_waitqueue_head(&md->queue->throttle_wait); + } + md->queue->backing_dev_info.congested_fn = dm_any_congested; md->queue->backing_dev_info.congested_data = md; blk_queue_make_request(md->queue, dm_request); @@ -1571,6 +1624,7 @@ int dm_suspended(struct mapped_device *m { return test_bit(DMF_SUSPENDED, &md->flags); } +EXPORT_SYMBOL_GPL(dm_suspended); int dm_noflush_suspending(struct dm_target *ti) { diff -purN linux-2.6.24.2/fs/bio.c linux-2.6.24.2-new/fs/bio.c --- linux-2.6.24.2/fs/bio.c 2008-02-10 21:51:11.000000000 -0800 +++ linux-2.6.24.2-new/fs/bio.c 2008-06-02 16:23:00.000000000 -0700 @@ -1007,6 +1007,13 @@ void bio_endio(struct bio *bio, int erro else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) error = -EIO; + if (bio->bi_queue) { + struct request_queue *q = bio->bi_queue; + atomic_add(bio->bi_max_vecs, &q->available); + bio->bi_queue = NULL; /* 0xdeadbeef? */ + wake_up(&q->throttle_wait); + } + if (bio->bi_end_io) bio->bi_end_io(bio, error); } diff -purN linux-2.6.24.2/include/linux/bio.h linux-2.6.24.2-new/include/linux/bio.h --- linux-2.6.24.2/include/linux/bio.h 2008-02-10 21:51:11.000000000 -0800 +++ linux-2.6.24.2-new/include/linux/bio.h 2008-06-02 16:23:00.000000000 -0700 @@ -111,6 +111,7 @@ struct bio { bio_end_io_t *bi_end_io; atomic_t bi_cnt; /* pin count */ + struct request_queue *bi_queue; /* for throttling */ void *bi_private; bio_destructor_t *bi_destructor; /* destructor */ diff -purN linux-2.6.24.2/include/linux/blkdev.h linux-2.6.24.2-new/include/linux/blkdev.h --- linux-2.6.24.2/include/linux/blkdev.h 2008-02-10 21:51:11.000000000 -0800 +++ linux-2.6.24.2-new/include/linux/blkdev.h 2008-06-02 16:23:00.000000000 -0700 @@ -384,6 +384,10 @@ struct request_queue struct work_struct unplug_work; struct backing_dev_info backing_dev_info; + unsigned (*metric)(struct bio *bio); /* (stub) bio throttle metric */ + wait_queue_head_t throttle_wait; + atomic_t available; + unsigned capacity; /* * The queue owner gets to use this for whatever they like. diff -purN linux-2.6.24.2/include/linux/sysctl.h linux-2.6.24.2-new/include/linux/sysctl.h --- linux-2.6.24.2/include/linux/sysctl.h 2008-02-10 21:51:11.000000000 -0800 +++ linux-2.6.24.2-new/include/linux/sysctl.h 2008-06-02 16:24:11.000000000 -0700 @@ -859,6 +859,7 @@ enum { DEV_MAC_HID=5, DEV_SCSI=6, DEV_IPMI=7, + DEV_DM=8 }; /* /proc/sys/dev/cdrom */ @@ -882,6 +883,11 @@ enum { DEV_RAID_SPEED_LIMIT_MAX=2 }; +/* /proc/sys/dev/dm */ +enum { + DEV_DM_BIO_THROTTLE = 1 +}; + /* /proc/sys/dev/parport/default */ enum { DEV_PARPORT_DEFAULT_TIMESLICE=1, diff -purN linux-2.6.24.2/kernel/sysctl_check.c linux-2.6.24.2-new/kernel/sysctl_check.c --- linux-2.6.24.2/kernel/sysctl_check.c 2008-02-10 21:51:11.000000000 -0800 +++ linux-2.6.24.2-new/kernel/sysctl_check.c 2008-06-02 16:24:11.000000000 -0700 @@ -875,6 +875,11 @@ static struct trans_ctl_table trans_parp {} }; +static struct trans_ctl_table trans_dm_table[] = { + { DEV_DM_BIO_THROTTLE, "bio_throttle" }, + {} +}; + static struct trans_ctl_table trans_dev_table[] = { { DEV_CDROM, "cdrom", trans_cdrom_table }, /* DEV_HWMON unused */ @@ -883,6 +888,7 @@ static struct trans_ctl_table trans_dev_ { DEV_MAC_HID, "mac_hid", trans_mac_hid_files }, { DEV_SCSI, "scsi", trans_scsi_table }, { DEV_IPMI, "ipmi", trans_ipmi_table }, + { DEV_DM, "dm", trans_dm_table }, {} };