From b2fab5acd28ead6f0dd6c3996ba23f0ef1772f15 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Mar 2012 13:14:57 -0800 Subject: elevator: make elevator_init_fn() return 0/-errno elevator_ops->elevator_init_fn() has a weird return value. It returns a void * which the caller should assign to q->elevator->elevator_data and %NULL return denotes init failure. Update such that it returns integer 0/-errno and sets elevator_data directly as necessary. This makes the interface more conventional and eases further cleanup. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- include/linux/elevator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 7d4e0356f329..97fb2557a18c 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -33,7 +33,7 @@ typedef void (elevator_put_req_fn) (struct request *); typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *); typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *); -typedef void *(elevator_init_fn) (struct request_queue *); +typedef int (elevator_init_fn) (struct request_queue *); typedef void (elevator_exit_fn) (struct elevator_queue *); struct elevator_ops -- cgit v1.2.3 From d732580b4eb31553c63744a47d590f770cafb8f0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Mar 2012 13:14:58 -0800 Subject: block: implement blk_queue_bypass_start/end() Rename and extend elv_queisce_start/end() to blk_queue_bypass_start/end() which are exported and supports nesting via @q->bypass_depth. Also add blk_queue_bypass() to test bypass state. This will be further extended and used for blkio_group management. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 606cf339bb56..315db1d91bc4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -389,6 +389,8 @@ struct request_queue { struct mutex sysfs_lock; + int bypass_depth; + #if defined(CONFIG_BLK_DEV_BSG) bsg_job_fn *bsg_job_fn; int bsg_job_size; @@ -406,7 +408,7 @@ struct request_queue { #define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */ #define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */ #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ -#define QUEUE_FLAG_ELVSWITCH 6 /* don't use elevator, just do FIFO */ +#define QUEUE_FLAG_BYPASS 6 /* act as dumb FIFO queue */ #define QUEUE_FLAG_BIDI 7 /* queue supports bidi requests */ #define QUEUE_FLAG_NOMERGES 8 /* disable merge attempts */ #define QUEUE_FLAG_SAME_COMP 9 /* complete on same CPU-group */ @@ -494,6 +496,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags) +#define blk_queue_bypass(q) test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_noxmerges(q) \ test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) -- cgit v1.2.3 From 923adde1be1df57cebd80c563058e503376645e8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Mar 2012 13:15:13 -0800 Subject: blkcg: clear all request_queues on blkcg policy [un]registrations Keep track of all request_queues which have blkcg initialized and turn on bypass and invoke blkcg_clear_queue() on all before making changes to blkcg policies. This is to prepare for moving blkg management into blkcg core. Note that this uses more brute force than necessary. Finer grained shoot down will be implemented later and given that policy [un]registration almost never happens on running systems (blk-throtl can't be built as a module and cfq usually is the builtin default iosched), this shouldn't be a problem for the time being. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 315db1d91bc4..e8c0bbd06b9a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -397,6 +397,9 @@ struct request_queue { struct bsg_class_device bsg_dev; #endif +#ifdef CONFIG_BLK_CGROUP + struct list_head all_q_node; +#endif #ifdef CONFIG_BLK_DEV_THROTTLING /* Throttle data */ struct throtl_data *td; -- cgit v1.2.3 From 4eef3049986e8397d5003916aed8cad6567a5e02 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Mar 2012 13:15:18 -0800 Subject: blkcg: move per-queue blkg list heads and counters to queue and blkg Currently, specific policy implementations are responsible for maintaining list and number of blkgs. This duplicates code unnecessarily, and hinders factoring common code and providing blkcg API with better defined semantics. After this patch, request_queue hosts list heads and counters and blkg has list nodes for both policies. This patch only relocates the necessary fields and the next patch will actually move management code into blkcg core. Note that request_queue->blkg_list[] and ->nr_blkgs[] are hardcoded to have 2 elements. This is to avoid include dependency and will be removed by the next patch. This patch doesn't introduce any behavior change. -v2: Now unnecessary conditional on CONFIG_BLK_CGROUP_MODULE removed as pointed out by Vivek. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e8c0bbd06b9a..f4e35edea70f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -362,6 +362,11 @@ struct request_queue { struct list_head timeout_list; struct list_head icq_list; +#ifdef CONFIG_BLK_CGROUP + /* XXX: array size hardcoded to avoid include dependency (temporary) */ + struct list_head blkg_list[2]; + int nr_blkgs[2]; +#endif struct queue_limits limits; -- cgit v1.2.3 From 03aa264ac15637b6f98374270bcdf31400965505 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Mar 2012 13:15:19 -0800 Subject: blkcg: let blkcg core manage per-queue blkg list and counter With the previous patch to move blkg list heads and counters to request_queue and blkg, logic to manage them in both policies are almost identical and can be moved to blkcg core. This patch moves blkg link logic into blkg_lookup_create(), implements common blkg unlink code in blkg_destroy(), and updates blkg_destory_all() so that it's policy specific and can skip root group. The updated blkg_destroy_all() is now used to both clear queue for bypassing and elv switching, and release all blkgs on q exit. This patch introduces a race window where policy [de]registration may race against queue blkg clearing. This can only be a problem on cfq unload and shouldn't be a real problem in practice (and we have many other places where this race already exists). Future patches will remove these unlikely races. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f4e35edea70f..b4d1d4bfc168 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -364,8 +364,8 @@ struct request_queue { struct list_head icq_list; #ifdef CONFIG_BLK_CGROUP /* XXX: array size hardcoded to avoid include dependency (temporary) */ - struct list_head blkg_list[2]; - int nr_blkgs[2]; + struct list_head blkg_list; + int nr_blkgs; #endif struct queue_limits limits; -- cgit v1.2.3 From c875f4d0250a1f070fa26087a73bdd8f54c48100 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Mar 2012 13:15:22 -0800 Subject: blkcg: drop unnecessary RCU locking Now that blkg additions / removals are always done under both q and blkcg locks, the only places RCU locking is necessary are blkg_lookup[_create]() for lookup w/o blkcg lock. This patch drops unncessary RCU locking replacing it with plain blkcg locking as necessary. * blkiocg_pre_destroy() already perform proper locking and don't need RCU. Dropped. * blkio_read_blkg_stats() now uses blkcg->lock instead of RCU read lock. This isn't a hot path. * Now unnecessary synchronize_rcu() from queue exit paths removed. This makes q->nr_blkgs unnecessary. Dropped. * RCU annotation on blkg->q removed. -v2: Vivek pointed out that blkg_lookup_create() still needs to be called under rcu_read_lock(). Updated. -v3: After the update, stats_lock locking in blkio_read_blkg_stats() shouldn't be using _irq variant as it otherwise ends up enabling irq while blkcg->lock is locked. Fixed. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b4d1d4bfc168..33f1b29e53f4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -365,7 +365,6 @@ struct request_queue { #ifdef CONFIG_BLK_CGROUP /* XXX: array size hardcoded to avoid include dependency (temporary) */ struct list_head blkg_list; - int nr_blkgs; #endif struct queue_limits limits; -- cgit v1.2.3 From 3d48749d93a3dce732dd30a14002ab90ec4355f3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Mar 2012 13:15:25 -0800 Subject: block: ioc_task_link() can't fail ioc_task_link() is used to share %current's ioc on clone. If %current->io_context is set, %current is guaranteed to have refcount on the ioc and, thus, ioc_task_link() can't fail. Replace error checking in ioc_task_link() with WARN_ON_ONCE() and make it just increment refcount and nr_tasks. -v2: Description typo fix (Vivek). Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- include/linux/iocontext.h | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 1a3018063034..81a8870ac224 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -120,18 +120,12 @@ struct io_context { struct work_struct release_work; }; -static inline struct io_context *ioc_task_link(struct io_context *ioc) +static inline void ioc_task_link(struct io_context *ioc) { - /* - * if ref count is zero, don't allow sharing (ioc is going away, it's - * a race). - */ - if (ioc && atomic_long_inc_not_zero(&ioc->refcount)) { - atomic_inc(&ioc->nr_tasks); - return ioc; - } - - return NULL; + WARN_ON_ONCE(atomic_long_read(&ioc->refcount) <= 0); + WARN_ON_ONCE(atomic_read(&ioc->nr_tasks) <= 0); + atomic_long_inc(&ioc->refcount); + atomic_inc(&ioc->nr_tasks); } struct task_struct; -- cgit v1.2.3 From f6e8d01bee036460e03bd4f6a79d014f98ba712e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Mar 2012 13:15:26 -0800 Subject: block: add io_context->active_ref Currently ioc->nr_tasks is used to decide two things - whether an ioc is done issuing IOs and whether it's shared by multiple tasks. This patch separate out the first into ioc->active_ref, which is acquired and released using {get|put}_io_context_active() respectively. This will be used to associate bio's with a given task. This patch doesn't introduce any visible behavior change. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- include/linux/iocontext.h | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 81a8870ac224..6f1a2608e91f 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -100,6 +100,7 @@ struct io_cq { */ struct io_context { atomic_long_t refcount; + atomic_t active_ref; atomic_t nr_tasks; /* all the fields below are protected by this lock */ @@ -120,17 +121,34 @@ struct io_context { struct work_struct release_work; }; -static inline void ioc_task_link(struct io_context *ioc) +/** + * get_io_context_active - get active reference on ioc + * @ioc: ioc of interest + * + * Only iocs with active reference can issue new IOs. This function + * acquires an active reference on @ioc. The caller must already have an + * active reference on @ioc. + */ +static inline void get_io_context_active(struct io_context *ioc) { WARN_ON_ONCE(atomic_long_read(&ioc->refcount) <= 0); - WARN_ON_ONCE(atomic_read(&ioc->nr_tasks) <= 0); + WARN_ON_ONCE(atomic_read(&ioc->active_ref) <= 0); atomic_long_inc(&ioc->refcount); + atomic_inc(&ioc->active_ref); +} + +static inline void ioc_task_link(struct io_context *ioc) +{ + get_io_context_active(ioc); + + WARN_ON_ONCE(atomic_read(&ioc->nr_tasks) <= 0); atomic_inc(&ioc->nr_tasks); } struct task_struct; #ifdef CONFIG_BLOCK void put_io_context(struct io_context *ioc); +void put_io_context_active(struct io_context *ioc); void exit_io_context(struct task_struct *task); struct io_context *get_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node); -- cgit v1.2.3 From 852c788f8365062c8a383c5a93f7f7289977cb50 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Mar 2012 13:15:27 -0800 Subject: block: implement bio_associate_current() IO scheduling and cgroup are tied to the issuing task via io_context and cgroup of %current. Unfortunately, there are cases where IOs need to be routed via a different task which makes scheduling and cgroup limit enforcement applied completely incorrectly. For example, all bios delayed by blk-throttle end up being issued by a delayed work item and get assigned the io_context of the worker task which happens to serve the work item and dumped to the default block cgroup. This is double confusing as bios which aren't delayed end up in the correct cgroup and makes using blk-throttle and cfq propio together impossible. Any code which punts IO issuing to another task is affected which is getting more and more common (e.g. btrfs). As both io_context and cgroup are firmly tied to task including userland visible APIs to manipulate them, it makes a lot of sense to match up tasks to bios. This patch implements bio_associate_current() which associates the specified bio with %current. The bio will record the associated ioc and blkcg at that point and block layer will use the recorded ones regardless of which task actually ends up issuing the bio. bio release puts the associated ioc and blkcg. It grabs and remembers ioc and blkcg instead of the task itself because task may already be dead by the time the bio is issued making ioc and blkcg inaccessible and those are all block layer cares about. elevator_set_req_fn() is updated such that the bio elvdata is being allocated for is available to the elevator. This doesn't update block cgroup policies yet. Further patches will implement the support. -v2: #ifdef CONFIG_BLK_CGROUP added around bio->bi_ioc dereference in rq_ioc() to fix build breakage. Signed-off-by: Tejun Heo Cc: Vivek Goyal Cc: Kent Overstreet Signed-off-by: Jens Axboe --- include/linux/bio.h | 8 ++++++++ include/linux/blk_types.h | 10 ++++++++++ include/linux/elevator.h | 6 ++++-- 3 files changed, 22 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 129a9c097958..692d3d5b49f5 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -268,6 +268,14 @@ extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set extern void bvec_free_bs(struct bio_set *, struct bio_vec *, unsigned int); extern unsigned int bvec_nr_vecs(unsigned short idx); +#ifdef CONFIG_BLK_CGROUP +int bio_associate_current(struct bio *bio); +void bio_disassociate_task(struct bio *bio); +#else /* CONFIG_BLK_CGROUP */ +static inline int bio_associate_current(struct bio *bio) { return -ENOENT; } +static inline void bio_disassociate_task(struct bio *bio) { } +#endif /* CONFIG_BLK_CGROUP */ + /* * bio_set is used to allow other portions of the IO system to * allocate their own private memory pools for bio and iovec structures. diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 4053cbd4490e..0edb65dd8edd 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -14,6 +14,8 @@ struct bio; struct bio_integrity_payload; struct page; struct block_device; +struct io_context; +struct cgroup_subsys_state; typedef void (bio_end_io_t) (struct bio *, int); typedef void (bio_destructor_t) (struct bio *); @@ -66,6 +68,14 @@ struct bio { bio_end_io_t *bi_end_io; void *bi_private; +#ifdef CONFIG_BLK_CGROUP + /* + * Optional ioc and css associated with this bio. Put on bio + * release. Read comment on top of bio_associate_current(). + */ + struct io_context *bi_ioc; + struct cgroup_subsys_state *bi_css; +#endif #if defined(CONFIG_BLK_DEV_INTEGRITY) struct bio_integrity_payload *bi_integrity; /* data integrity */ #endif diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 97fb2557a18c..c03af7687bb4 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -28,7 +28,8 @@ typedef int (elevator_may_queue_fn) (struct request_queue *, int); typedef void (elevator_init_icq_fn) (struct io_cq *); typedef void (elevator_exit_icq_fn) (struct io_cq *); -typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, gfp_t); +typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, + struct bio *, gfp_t); typedef void (elevator_put_req_fn) (struct request *); typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *); typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *); @@ -129,7 +130,8 @@ extern void elv_unregister_queue(struct request_queue *q); extern int elv_may_queue(struct request_queue *, int); extern void elv_abort_queue(struct request_queue *); extern void elv_completed_request(struct request_queue *, struct request *); -extern int elv_set_request(struct request_queue *, struct request *, gfp_t); +extern int elv_set_request(struct request_queue *q, struct request *rq, + struct bio *bio, gfp_t gfp_mask); extern void elv_put_request(struct request_queue *, struct request *); extern void elv_drain_elevator(struct request_queue *); -- cgit v1.2.3 From 598971bfbdfdc8701337dc1636c7919c44699914 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 19 Mar 2012 15:10:58 -0700 Subject: cfq: don't use icq_get_changed() cfq caches the associated cfqq's for a given cic. The cache needs to be flushed if the cic's ioprio or blkcg has changed. It is currently done by requiring the changing action to set the respective ICQ_*_CHANGED bit in the icq and testing it from cfq_set_request(), which involves iterating through all the affected icqs. All cfq wants to know is whether ioprio and/or blkcg have changed since the last flush and can be easily achieved by just remembering the current ioprio and blkcg ID in cic. This patch adds cic->{ioprio|blkcg_id}, updates all ioprio users to use the remembered value instead, and updates cfq_set_request() path such that, instead of using icq_get_changed(), the current values are compared against the remembered ones and trigger appropriate flush action if not. Condition tests are moved inside both _changed functions which are now named check_ioprio_changed() and check_blkcg_changed(). ioprio.h::task_ioprio*() can't be used anymore and replaced with open-coded IOPRIO_CLASS_NONE case in cfq_async_queue_prio(). Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- include/linux/ioprio.h | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 76dad4808847..beb9ce1c2c23 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -42,26 +42,14 @@ enum { }; /* - * if process has set io priority explicitly, use that. if not, convert - * the cpu scheduler nice value to an io priority + * Fallback BE priority */ #define IOPRIO_NORM (4) -static inline int task_ioprio(struct io_context *ioc) -{ - if (ioprio_valid(ioc->ioprio)) - return IOPRIO_PRIO_DATA(ioc->ioprio); - - return IOPRIO_NORM; -} - -static inline int task_ioprio_class(struct io_context *ioc) -{ - if (ioprio_valid(ioc->ioprio)) - return IOPRIO_PRIO_CLASS(ioc->ioprio); - - return IOPRIO_CLASS_BE; -} +/* + * if process has set io priority explicitly, use that. if not, convert + * the cpu scheduler nice value to an io priority + */ static inline int task_nice_ioprio(struct task_struct *task) { return (task_nice(task) + 20) / 5; -- cgit v1.2.3 From 2b566fa55b9a94b53217c2818e6c5e5756eeb1a1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 19 Mar 2012 15:10:59 -0700 Subject: block: remove ioc_*_changed() After the previous patch to cfq, there's no ioc_get_changed() user left. This patch yanks out ioc_{ioprio|cgroup|get}_changed() and all related stuff. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- include/linux/iocontext.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 6f1a2608e91f..df38db2ef45b 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -6,11 +6,7 @@ #include enum { - ICQ_IOPRIO_CHANGED = 1 << 0, - ICQ_CGROUP_CHANGED = 1 << 1, ICQ_EXITED = 1 << 2, - - ICQ_CHANGED_MASK = ICQ_IOPRIO_CHANGED | ICQ_CGROUP_CHANGED, }; /* @@ -152,9 +148,6 @@ void put_io_context_active(struct io_context *ioc); void exit_io_context(struct task_struct *task); struct io_context *get_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node); -void ioc_ioprio_changed(struct io_context *ioc, int ioprio); -void ioc_cgroup_changed(struct io_context *ioc); -unsigned int icq_get_changed(struct io_cq *icq); #else struct io_context; static inline void put_io_context(struct io_context *ioc) { } -- cgit v1.2.3 From 8bd435b30ecacb69bbb8b2d3e251f770b807c5b2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Apr 2012 13:11:28 -0700 Subject: blkcg: remove static policy ID enums Remove BLKIO_POLICY_* enums and let blkio_policy_register() allocate @pol->plid dynamically on registration. The maximum number of blkcg policies which can be registered at the same time is defined by BLKCG_MAX_POLS constant added to include/linux/blkdev.h. Note that blkio_policy_register() now may fail. Policy init functions updated accordingly and unnecessary ifdefs removed from cfq_init(). Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 33f1b29e53f4..d2c69f8c188a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -35,6 +35,12 @@ struct bsg_job; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ +/* + * Maximum number of blkcg policies allowed to be registered concurrently. + * Defined here to simplify include dependency. + */ +#define BLKCG_MAX_POLS 2 + struct request; typedef void (rq_end_io_fn)(struct request *, int); @@ -363,7 +369,6 @@ struct request_queue { struct list_head icq_list; #ifdef CONFIG_BLK_CGROUP - /* XXX: array size hardcoded to avoid include dependency (temporary) */ struct list_head blkg_list; #endif -- cgit v1.2.3 From 03d8e11142a893ad322285d3c8a08e88b570cda1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Apr 2012 13:11:32 -0700 Subject: blkcg: add request_queue->root_blkg With per-queue policy activation, root blkg creation will be moved to blkcg core. Add q->root_blkg in preparation. For blk-throtl, this replaces throtl_data->root_tg; however, cfq needs to keep cfqd->root_group for !CONFIG_CFQ_GROUP_IOSCHED. This is to prepare for per-queue policy activation and doesn't cause any functional difference. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d2c69f8c188a..b01c377fd739 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -31,6 +31,7 @@ struct blk_trace; struct request; struct sg_io_hdr; struct bsg_job; +struct blkio_group; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ @@ -369,6 +370,7 @@ struct request_queue { struct list_head icq_list; #ifdef CONFIG_BLK_CGROUP + struct blkio_group *root_blkg; struct list_head blkg_list; #endif -- cgit v1.2.3 From a2b1693bac45ea3fe3ba612fd22c45f17449f610 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Apr 2012 13:11:33 -0700 Subject: blkcg: implement per-queue policy activation All blkcg policies were assumed to be enabled on all request_queues. Due to various implementation obstacles, during the recent blkcg core updates, this was temporarily implemented as shooting down all !root blkgs on elevator switch and policy [de]registration combined with half-broken in-place root blkg updates. In addition to being buggy and racy, this meant losing all blkcg configurations across those events. Now that blkcg is cleaned up enough, this patch replaces the temporary implementation with proper per-queue policy activation. Each blkcg policy should call the new blkcg_[de]activate_policy() to enable and disable the policy on a specific queue. blkcg_activate_policy() allocates and installs policy data for the policy for all existing blkgs. blkcg_deactivate_policy() does the reverse. If a policy is not enabled for a given queue, blkg printing / config functions skip the respective blkg for the queue. blkcg_activate_policy() also takes care of root blkg creation, and cfq_init_queue() and blk_throtl_init() are updated accordingly. This replaces blkcg_bypass_{start|end}() and update_root_blkg_pd() unnecessary. Dropped. v2: cfq_init_queue() was returning uninitialized @ret on root_group alloc failure if !CONFIG_CFQ_GROUP_IOSCHED. Fixed. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b01c377fd739..68720ab275d4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -370,6 +370,7 @@ struct request_queue { struct list_head icq_list; #ifdef CONFIG_BLK_CGROUP + DECLARE_BITMAP (blkcg_pols, BLKCG_MAX_POLS); struct blkio_group *root_blkg; struct list_head blkg_list; #endif -- cgit v1.2.3 From 3c798398e393e5f9502dbab2b51e6c25e2e8f2ac Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 16 Apr 2012 13:57:25 -0700 Subject: blkcg: mass rename of blkcg API During the recent blkcg cleanup, most of blkcg API has changed to such extent that mass renaming wouldn't cause any noticeable pain. Take the chance and cleanup the naming. * Rename blkio_cgroup to blkcg. * Drop blkio / blkiocg prefixes and consistently use blkcg. * Rename blkio_group to blkcg_gq, which is consistent with io_cq but keep the blkg prefix / variable name. * Rename policy method type and field names to signify they're dealing with policy data. * Rename blkio_policy_type to blkcg_policy. This patch doesn't cause any functional change. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 68720ab275d4..af33fb1adfee 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -31,7 +31,7 @@ struct blk_trace; struct request; struct sg_io_hdr; struct bsg_job; -struct blkio_group; +struct blkcg_gq; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ @@ -371,7 +371,7 @@ struct request_queue { struct list_head icq_list; #ifdef CONFIG_BLK_CGROUP DECLARE_BITMAP (blkcg_pols, BLKCG_MAX_POLS); - struct blkio_group *root_blkg; + struct blkcg_gq *root_blkg; struct list_head blkg_list; #endif -- cgit v1.2.3