diff --git a/Documentation/block/bfq-iosched.rst b/Documentation/block/bfq-iosched.rst index 19d4d1570..4969e4a6b 100644 --- a/Documentation/block/bfq-iosched.rst +++ b/Documentation/block/bfq-iosched.rst @@ -69,6 +69,7 @@ BFQ works for multi-queue devices too. 4. BFQ group scheduling 4-1 Service guarantees provided 4-2 Interface + 5. Development version of BFQ 1. When may BFQ be useful? ========================== @@ -569,12 +570,51 @@ weight of the queues associated with interactive and soft real-time applications. Unset this tunable if you need/want to control weights. +5. Development version of BFQ +============================= + +The production version of BFQ is available in Linux since 4.12, +only for the new blk-mq version of the block layer. + +The development version of BFQ for a given kernel version differs from +the production version in that: + +- it contains commits not available for that kernel version; +- it contains a lot of consistency checks to detect possible malfunctions. + +Depending on the kernel version, development versions of BFQ are available +in two different forms in this repository. + +First, up to kernels 4.19.X, both the legacy (single-queue) and the blk-mq +versions of the block layer were available. And we made BFQ for both +versions of the block layer. So, to try to reduce confusion, +in this repo we added these variants of BFQ in the branches based on +kernels up to 4.19: + +- bfq-sq: development version of BFQ for legacy block +- bfq-mq: development version of BFQ for blk-mq + +IOW, up to 4.19, you find three versions of BFQ: the production version, +named bfq, plus two development versions, named bfq-sq and bfq-mq. + +Things change from kernel 5.0 onwards. Since the legacy block layer is +not available any longer and therefore there can be no bfq-sq. +Since the production version of bfq lags much less in terms of +important commits, compared to the development version of bfq, +the development branches do not contain an additional bfq-mq scheduler, +but simply the additional commits that turn the production version into the +development version, leaving the name unchanged. +IOW, in each branch, there is still only one BFQ, named bfq. +But this bfq is more powerful and consistency-checked than the production version. +See Section 4 for details. + + [1] P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O Scheduler", Proceedings of the First Workshop on Mobile System Technologies (MST-2015), May 2015. - http://algogroup.unimore.it/people/paolo/disk_sched/mst-2015.pdf + https://algogroup.unimore.it/people/paolo/disk_sched/mst-2015.pdf [2] P. Valente and M. Andreolini, "Improving Application @@ -584,7 +624,10 @@ applications. Unset this tunable if you need/want to control weights. Slightly extended version: - http://algogroup.unimore.it/people/paolo/disk_sched/bfq-v1-suite-results.pdf + https://algogroup.unimore.it/people/paolo/disk_sched/bfq-v1-suite-results.pdf [3] https://github.com/Algodev-github/S + +[4] + https://github.com/Algodev-github/bfq-mq diff --git a/MAINTAINERS b/MAINTAINERS index 281de213e..7061939c0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3172,6 +3172,13 @@ S: Maintained F: Documentation/block/bfq-iosched.rst F: block/bfq-* +BFQ-dev I/O SCHEDULER +M: Piotr Gorski +T: git git://github.com/sirlucjan/bfq-mq-lucjan.git +S: Maintained +F: block/bfq-* +F: Documentation/block/bfq-iosched.rst + BFS FILE SYSTEM M: "Tigran A. Aivazian" S: Maintained diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 68882b9b8..2c776493a 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -328,11 +328,14 @@ static void bfqg_put(struct bfq_group *bfqg) { bfqg->ref--; - if (bfqg->ref == 0) + BFQ_BUG_ON(bfqg->ref < 0); + if (bfqg->ref == 0) { + BFQ_BUG_ON(bfqg->entity.on_st_or_in_serv); kfree(bfqg); + } } -void bfqg_and_blkg_get(struct bfq_group *bfqg) +static void bfqg_and_blkg_get(struct bfq_group *bfqg) { /* see comments in bfq_bic_update_cgroup for why refcounting bfqg */ bfqg_get(bfqg); @@ -425,6 +428,8 @@ void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg) { struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + BFQ_BUG_ON(!bfqq); + entity->weight = entity->new_weight; entity->orig_weight = entity->new_weight; if (bfqq) { @@ -434,6 +439,9 @@ void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg) * Make sure that bfqg and its associated blkg do not * disappear before entity. */ + bfq_log_bfqq(bfqq->bfqd, bfqq, "getting bfqg %p and blkg\n", + bfqg); + bfqg_and_blkg_get(bfqg); } entity->parent = bfqg->my_entity; /* NULL for root group */ @@ -505,12 +513,18 @@ static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp) return &bgd->pd; } +static inline int bfq_dft_weight(void) +{ + return cgroup_subsys_on_dfl(io_cgrp_subsys) ? + CGROUP_WEIGHT_DFL : BFQ_WEIGHT_LEGACY_DFL; + +} + static void bfq_cpd_init(struct blkcg_policy_data *cpd) { struct bfq_group_data *d = cpd_to_bfqgd(cpd); - d->weight = cgroup_subsys_on_dfl(io_cgrp_subsys) ? - CGROUP_WEIGHT_DFL : BFQ_WEIGHT_LEGACY_DFL; + d->weight = bfq_dft_weight(); } static void bfq_cpd_free(struct blkcg_policy_data *cpd) @@ -539,11 +553,19 @@ static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, struct request_queue *q, static void bfq_pd_init(struct blkg_policy_data *pd) { - struct blkcg_gq *blkg = pd_to_blkg(pd); - struct bfq_group *bfqg = blkg_to_bfqg(blkg); - struct bfq_data *bfqd = blkg->q->elevator->elevator_data; - struct bfq_entity *entity = &bfqg->entity; - struct bfq_group_data *d = blkcg_to_bfqgd(blkg->blkcg); + struct blkcg_gq *blkg; + struct bfq_group *bfqg; + struct bfq_data *bfqd; + struct bfq_entity *entity; + struct bfq_group_data *d; + + blkg = pd_to_blkg(pd); + BFQ_BUG_ON(!blkg); + bfqg = blkg_to_bfqg(blkg); + bfqd = blkg->q->elevator->elevator_data; + BFQ_BUG_ON(bfqg == bfqd->root_group); + entity = &bfqg->entity; + d = blkcg_to_bfqgd(blkg->blkcg); entity->orig_weight = entity->weight = entity->new_weight = d->weight; entity->my_sched_data = &bfqg->sched_data; @@ -554,6 +576,9 @@ static void bfq_pd_init(struct blkg_policy_data *pd) bfqg->bfqd = bfqd; bfqg->active_entities = 0; bfqg->rq_pos_tree = RB_ROOT; + + if (entity->new_weight != bfq_dft_weight()) + bfqd_enable_active_group_check(bfqd); } static void bfq_pd_free(struct blkg_policy_data *pd) @@ -576,6 +601,10 @@ static void bfq_group_set_parent(struct bfq_group *bfqg, { struct bfq_entity *entity; + BFQ_BUG_ON(!parent); + BFQ_BUG_ON(!bfqg); + BFQ_BUG_ON(bfqg == parent); + entity = &bfqg->entity; entity->parent = parent->my_entity; entity->sched_data = &parent->sched_data; @@ -612,10 +641,12 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, for_each_entity(entity) { struct bfq_group *curr_bfqg = container_of(entity, struct bfq_group, entity); + BFQ_BUG_ON(!curr_bfqg); if (curr_bfqg != bfqd->root_group) { parent = bfqg_parent(curr_bfqg); if (!parent) parent = bfqd->root_group; + BFQ_BUG_ON(!parent); bfq_group_set_parent(curr_bfqg, parent); } } @@ -642,6 +673,11 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, { struct bfq_entity *entity = &bfqq->entity; + BFQ_BUG_ON(!bfq_bfqq_busy(bfqq) && !RB_EMPTY_ROOT(&bfqq->sort_list)); + BFQ_BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list) && + !entity->on_st_or_in_serv); + BFQ_BUG_ON(!bfq_bfqq_busy(bfqq) && bfqq == bfqd->in_service_queue); + /* * Get extra reference to prevent bfqq from being freed in * next possible expire or deactivate. @@ -658,14 +694,20 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfq_bfqq_expire(bfqd, bfqd->in_service_queue, false, BFQQE_PREEMPTED); + BFQ_BUG_ON(entity->on_st_or_in_serv && !bfq_bfqq_busy(bfqq) + && &bfq_entity_service_tree(entity)->idle != + entity->tree); + if (bfq_bfqq_busy(bfqq)) bfq_deactivate_bfqq(bfqd, bfqq, false, false); else if (entity->on_st_or_in_serv) bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); + bfqg_and_blkg_put(bfqq_group(bfqq)); entity->parent = bfqg->my_entity; entity->sched_data = &bfqg->sched_data; + /* pin down bfqg and its associated blkg */ bfqg_and_blkg_get(bfqg); @@ -677,6 +719,11 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (!bfqd->in_service_queue && !bfqd->rq_in_driver) bfq_schedule_dispatch(bfqd); + + BFQ_BUG_ON(entity->on_st_or_in_serv && !bfq_bfqq_busy(bfqq) + && &bfq_entity_service_tree(entity)->idle != + entity->tree); + /* release extra ref taken above, bfqq may happen to be freed now */ bfq_put_queue(bfqq); } @@ -714,6 +761,10 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, if (entity->sched_data != &bfqg->sched_data) { bic_set_bfqq(bic, NULL, 0); + bfq_log_bfqq(bfqd, async_bfqq, + "%p %d", + async_bfqq, + async_bfqq->ref); bfq_release_process_ref(bfqd, async_bfqq); } } @@ -835,9 +886,12 @@ static void bfq_reparent_leaf_entity(struct bfq_data *bfqd, if (!child_entity) child_entity = child_sd->in_service_entity; + BFQ_BUG_ON(!child_entity); } bfqq = bfq_entity_to_bfqq(child_entity); + BFQ_BUG_ON(!bfqq); + BFQ_BUG_ON(!bfq_bfqq_busy(bfqq)); bfq_bfqq_move(bfqd, bfqq, bfqd->root_group); } @@ -875,12 +929,20 @@ static void bfq_reparent_active_queues(struct bfq_data *bfqd, static void bfq_pd_offline(struct blkg_policy_data *pd) { struct bfq_service_tree *st; - struct bfq_group *bfqg = pd_to_bfqg(pd); - struct bfq_data *bfqd = bfqg->bfqd; - struct bfq_entity *entity = bfqg->my_entity; + struct bfq_group *bfqg; + struct bfq_data *bfqd; + struct bfq_entity *entity; unsigned long flags; int i; + BFQ_BUG_ON(!pd); + bfqg = pd_to_bfqg(pd); + BFQ_BUG_ON(!bfqg); + bfqd = bfqg->bfqd; + BFQ_BUG_ON(bfqd && !bfqd->root_group); + + entity = bfqg->my_entity; + spin_lock_irqsave(&bfqd->lock, flags); if (!entity) /* root group */ @@ -919,7 +981,11 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) * empty and gets expired. */ bfq_flush_idle_tree(st); + BFQ_BUG_ON(!RB_EMPTY_ROOT(&st->active)); + BFQ_BUG_ON(!RB_EMPTY_ROOT(&st->idle)); } + BFQ_BUG_ON(bfqg->sched_data.next_in_service); + BFQ_BUG_ON(bfqg->sched_data.in_service_entity); __bfq_deactivate_entity(entity, false); @@ -942,6 +1008,7 @@ void bfq_end_wr_async(struct bfq_data *bfqd) list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) { struct bfq_group *bfqg = blkg_to_bfqg(blkg); + BFQ_BUG_ON(!bfqg); bfq_end_wr_async_queues(bfqd, bfqg); } @@ -1013,6 +1080,7 @@ static void bfq_group_set_weight(struct bfq_group *bfqg, u64 weight, u64 dev_wei */ smp_wmb(); bfqg->entity.prio_changed = 1; + bfqd_enable_active_group_check(bfqg->bfqd); } } @@ -1398,6 +1466,8 @@ void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg) { struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + BFQ_BUG_ON(!bfqq); + entity->weight = entity->new_weight; entity->orig_weight = entity->new_weight; if (bfqq) { diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index e667cc98c..e12fe17af 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -99,19 +99,19 @@ * [1] P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O * Scheduler", Proceedings of the First Workshop on Mobile System * Technologies (MST-2015), May 2015. - * http://algogroup.unimore.it/people/paolo/disk_sched/mst-2015.pdf + * https://algogroup.unimore.it/people/paolo/disk_sched/mst-2015.pdf * * [2] Jon C.R. Bennett and H. Zhang, "Hierarchical Packet Fair Queueing * Algorithms", IEEE/ACM Transactions on Networking, 5(5):675-689, * Oct 1997. * - * http://www.cs.cmu.edu/~hzhang/papers/TON-97-Oct.ps.gz + * https://www.cs.cmu.edu/~hzhang/papers/TON-97-Oct.ps.gz * * [3] I. Stoica and H. Abdel-Wahab, "Earliest Eligible Virtual Deadline * First: A Flexible and Accurate Mechanism for Proportional Share * Resource Allocation", technical report. * - * http://www.cs.berkeley.edu/~istoica/papers/eevdf-tr-95.pdf + * https://www.cs.berkeley.edu/~istoica/papers/eevdf-tr-95.pdf */ #include #include @@ -217,6 +217,12 @@ const int bfq_timeout = HZ / 8; */ static const unsigned long bfq_merge_time_limit = HZ/10; +#define MAX_LENGTH_REASON_NAME 25 + +static const char reason_name[][MAX_LENGTH_REASON_NAME] = {"TOO_IDLE", +"BUDGET_TIMEOUT", "BUDGET_EXHAUSTED", "NO_MORE_REQUESTS", +"PREEMPTED"}; + static struct kmem_cache *bfq_pool; /* Below this threshold (in ns), we consider thinktime immediate. */ @@ -373,6 +379,11 @@ struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync) void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync) { + if (bfqq && bfqq->bfqd) + bfq_log_bfqq(bfqq->bfqd, bfqq, + "setting bfqq[%d] = %p for bic %p", + is_sync, bfqq, bic); + bic->bfqq[is_sync] = bfqq; } @@ -422,7 +433,7 @@ static struct bfq_io_cq *bfq_bic_lookup(struct bfq_data *bfqd, void bfq_schedule_dispatch(struct bfq_data *bfqd) { if (bfqd->queued != 0) { - bfq_log(bfqd, "schedule dispatch"); + bfq_log(bfqd, ""); blk_mq_run_hw_queues(bfqd->queue, true); } } @@ -542,8 +553,8 @@ static void bfq_limit_depth(unsigned int op, struct blk_mq_alloc_data *data) data->shallow_depth = bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(op)]; - bfq_log(bfqd, "[%s] wr_busy %d sync %d depth %u", - __func__, bfqd->wr_busy_queues, op_is_sync(op), + bfq_log(bfqd, "wr_busy %d sync %d depth %u", + bfqd->wr_busy_queues, op_is_sync(op), data->shallow_depth); } @@ -562,6 +573,7 @@ bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root, parent = *p; bfqq = rb_entry(parent, struct bfq_queue, pos_node); + BFQ_BUG_ON(bfqq && !bfqq->next_rq); /* * Sort strictly based on sector. Smallest to the left, @@ -581,8 +593,8 @@ bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root, if (rb_link) *rb_link = p; - bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d", - (unsigned long long)sector, + bfq_log(bfqd, "%llu: returning %d", + (unsigned long long) sector, bfqq ? bfqq->pid : 0); return bfqq; @@ -699,11 +711,24 @@ static bool bfq_asymmetric_scenario(struct bfq_data *bfqd, (bfqd->busy_queues[0] && bfqd->busy_queues[2]) || (bfqd->busy_queues[1] && bfqd->busy_queues[2]); - return varied_queue_weights || multiple_classes_busy + if (bfqq) { + bfq_log_bfqq(bfqd, bfqq, "smallest %d varied %d mul_classes %d", + smallest_weight, + !RB_EMPTY_ROOT(&bfqd->queue_weights_tree.rb_root) && + (bfqd->queue_weights_tree.rb_root.rb_node->rb_left || + bfqd->queue_weights_tree.rb_root.rb_node->rb_right), + multiple_classes_busy); + } else + bfq_log(bfqd, "varied_queue_weights %d mul_classes %d", + varied_queue_weights, multiple_classes_busy); + #ifdef CONFIG_BFQ_GROUP_IOSCHED - || bfqd->num_groups_with_pending_reqs > 0 + bfq_log(bfqd, "num_groups_with_pending_reqs %u", + bfqd->num_groups_with_pending_reqs); #endif - ; + + return varied_queue_weights || multiple_classes_busy || + bfqd_has_active_group(bfqd); } /* @@ -785,6 +810,11 @@ void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq, inc_counter: bfqq->weight_counter->num_active++; bfqq->ref++; + + bfq_log_bfqq(bfqq->bfqd, bfqq, "refs %d weight %d symmetric %d", + bfqq->ref, + entity->weight, + !bfq_asymmetric_scenario(bfqd, bfqq)); } /* @@ -797,9 +827,15 @@ void __bfq_weights_tree_remove(struct bfq_data *bfqd, struct bfq_queue *bfqq, struct rb_root_cached *root) { + struct bfq_entity *entity = &bfqq->entity; + if (!bfqq->weight_counter) return; + BFQ_BUG_ON(RB_EMPTY_ROOT(&root->rb_root)); + BFQ_BUG_ON(bfqq->weight_counter->weight != entity->weight); + + BFQ_BUG_ON(!bfqq->weight_counter->num_active); bfqq->weight_counter->num_active--; if (bfqq->weight_counter->num_active > 0) goto reset_entity_pointer; @@ -809,6 +845,11 @@ void __bfq_weights_tree_remove(struct bfq_data *bfqd, reset_entity_pointer: bfqq->weight_counter = NULL; + bfq_log_bfqq(bfqq->bfqd, bfqq, + "refs %d weight %d symmetric %d", + bfqq->ref, + entity->weight, + !bfq_asymmetric_scenario(bfqd, bfqq)); bfq_put_queue(bfqq); } @@ -824,7 +865,14 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd, for_each_entity(entity) { struct bfq_sched_data *sd = entity->my_sched_data; + BFQ_BUG_ON(entity->sched_data == NULL); /* + * It would mean + * that this is + * the root group. + */ + if (sd->next_in_service || sd->in_service_entity) { + BFQ_BUG_ON(!entity->in_groups_with_pending_reqs); /* * entity is still active, because either * next_in_service or in_service_entity is not @@ -839,6 +887,8 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd, break; } + BFQ_BUG_ON(!bfqd->num_groups_with_pending_reqs && + entity->in_groups_with_pending_reqs); /* * The decrement of num_groups_with_pending_reqs is * not performed immediately upon the deactivation of @@ -853,6 +903,8 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd, entity->in_groups_with_pending_reqs = false; bfqd->num_groups_with_pending_reqs--; } + bfq_log_bfqq(bfqd, bfqq, "num_groups_with_pending_reqs %u", + bfqd->num_groups_with_pending_reqs); } /* @@ -883,7 +935,8 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq, if (rq == last || ktime_get_ns() < rq->fifo_time) return NULL; - bfq_log_bfqq(bfqq->bfqd, bfqq, "check_fifo: returned %p", rq); + bfq_log_bfqq(bfqq->bfqd, bfqq, "returned %p", rq); + BFQ_BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); return rq; } @@ -895,10 +948,16 @@ static struct request *bfq_find_next_rq(struct bfq_data *bfqd, struct rb_node *rbprev = rb_prev(&last->rb_node); struct request *next, *prev = NULL; + BFQ_BUG_ON(list_empty(&bfqq->fifo)); + /* Follow expired path, else get first next available. */ next = bfq_check_fifo(bfqq, last); - if (next) + if (next) { + BFQ_BUG_ON(next == last); return next; + } + + BFQ_BUG_ON(RB_EMPTY_NODE(&last->rb_node)); if (rbprev) prev = rb_entry_rq(rbprev); @@ -918,6 +977,9 @@ static struct request *bfq_find_next_rq(struct bfq_data *bfqd, static unsigned long bfq_serv_to_charge(struct request *rq, struct bfq_queue *bfqq) { + BFQ_BUG_ON(!bfqq->bfqd); + BFQ_BUG_ON(!rq); + if (bfq_bfqq_sync(bfqq) || bfqq->wr_coeff > 1 || bfq_asymmetric_scenario(bfqq->bfqd, bfqq)) return blk_rq_sectors(rq); @@ -940,6 +1002,7 @@ static void bfq_updated_next_req(struct bfq_data *bfqd, struct bfq_queue *bfqq) { struct bfq_entity *entity = &bfqq->entity; + struct bfq_service_tree *st = bfq_entity_service_tree(entity); struct request *next_rq = bfqq->next_rq; unsigned long new_budget; @@ -953,13 +1016,16 @@ static void bfq_updated_next_req(struct bfq_data *bfqd, */ return; + BFQ_BUG_ON(entity->tree != &st->active); + BFQ_BUG_ON(entity == entity->sched_data->in_service_entity); + new_budget = max_t(unsigned long, max_t(unsigned long, bfqq->max_budget, bfq_serv_to_charge(next_rq, bfqq)), entity->service); if (entity->budget != new_budget) { entity->budget = new_budget; - bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu", + bfq_log_bfqq(bfqd, bfqq, "new budget %lu", new_budget); bfq_requeue_bfqq(bfqd, bfqq, false); } @@ -1028,8 +1094,15 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd, bfqq->ttime = bic->saved_ttime; bfqq->wr_coeff = bic->saved_wr_coeff; bfqq->wr_start_at_switch_to_srt = bic->saved_wr_start_at_switch_to_srt; + BFQ_BUG_ON(time_is_after_jiffies(bfqq->wr_start_at_switch_to_srt)); bfqq->last_wr_start_finish = bic->saved_last_wr_start_finish; bfqq->wr_cur_max_time = bic->saved_wr_cur_max_time; + BFQ_BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish)); + + bfq_log_bfqq(bfqq->bfqd, bfqq, + "bic %p wr_coeff %d start_finish %lu max_time %lu", + bic, bfqq->wr_coeff, bfqq->last_wr_start_finish, + bfqq->wr_cur_max_time); if (bfqq->wr_coeff > 1 && (bfq_bfqq_in_large_burst(bfqq) || time_is_before_jiffies(bfqq->last_wr_start_finish + @@ -1039,10 +1112,14 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd, time_is_after_eq_jiffies(bfqq->wr_start_at_switch_to_srt + bfq_wr_duration(bfqd))) { switch_back_to_interactive_wr(bfqq, bfqd); + bfq_log_bfqq(bfqq->bfqd, bfqq, + "switching back to interactive"); } else { bfqq->wr_coeff = 1; bfq_log_bfqq(bfqq->bfqd, bfqq, - "resume state: switching off wr"); + "switching off wr (%lu + %lu < %lu)", + bfqq->last_wr_start_finish, bfqq->wr_cur_max_time, + jiffies); } } @@ -1052,16 +1129,37 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd, if (likely(!busy)) return; - if (old_wr_coeff == 1 && bfqq->wr_coeff > 1) + if (old_wr_coeff == 1 && bfqq->wr_coeff > 1) { bfqd->wr_busy_queues++; - else if (old_wr_coeff > 1 && bfqq->wr_coeff == 1) + BFQ_BUG_ON(bfqd->wr_busy_queues > bfq_tot_busy_queues(bfqd)); + } else if (old_wr_coeff > 1 && bfqq->wr_coeff == 1) { bfqd->wr_busy_queues--; + BFQ_BUG_ON(bfqd->wr_busy_queues < 0); + } } static int bfqq_process_refs(struct bfq_queue *bfqq) { - return bfqq->ref - bfqq->allocated - bfqq->entity.on_st_or_in_serv - + int process_refs, io_refs; + + lockdep_assert_held(&bfqq->bfqd->lock); + + io_refs = bfqq->allocated; + process_refs = bfqq->ref - io_refs - bfqq->entity.on_st_or_in_serv - (bfqq->weight_counter != NULL); + + if (bfqq->proc_ref > process_refs) { + pr_crit("ref %d proc_ref %d computed %d", + bfqq->ref, bfqq->proc_ref, process_refs); + pr_crit("allocated %d on_st %d weight_counter %d", + bfqq->allocated, bfqq->entity.on_st_or_in_serv, + (bfqq->weight_counter != NULL)); + + BFQ_BUG_ON(true); + } + + BFQ_BUG_ON(process_refs < 0); + return process_refs; } /* Empty burst list and add just bfqq (see comments on bfq_handle_burst) */ @@ -1093,6 +1191,10 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) /* Increment burst size to take into account also bfqq */ bfqd->burst_size++; + bfq_log_bfqq(bfqd, bfqq, "%d", bfqd->burst_size); + + BFQ_BUG_ON(bfqd->burst_size > bfqd->bfq_large_burst_thresh); + if (bfqd->burst_size == bfqd->bfq_large_burst_thresh) { struct bfq_queue *pos, *bfqq_item; struct hlist_node *n; @@ -1102,15 +1204,19 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) * other to consider this burst as large. */ bfqd->large_burst = true; + bfq_log_bfqq(bfqd, bfqq, "large burst started"); /* * We can now mark all queues in the burst list as * belonging to a large burst. */ hlist_for_each_entry(bfqq_item, &bfqd->burst_list, - burst_list_node) + burst_list_node) { bfq_mark_bfqq_in_large_burst(bfqq_item); + bfq_log_bfqq(bfqd, bfqq_item, "marked in large burst"); + } bfq_mark_bfqq_in_large_burst(bfqq); + bfq_log_bfqq(bfqd, bfqq, "marked in large burst"); /* * From now on, and until the current burst finishes, any @@ -1275,6 +1381,8 @@ static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfqq->entity.parent != bfqd->burst_parent_entity) { bfqd->large_burst = false; bfq_reset_burst_list(bfqd, bfqq); + bfq_log_bfqq(bfqd, bfqq, + "late activation or different group"); goto end; } @@ -1284,6 +1392,7 @@ static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) * bfqq as belonging to this large burst immediately. */ if (bfqd->large_burst) { + bfq_log_bfqq(bfqd, bfqq, "marked in burst"); bfq_mark_bfqq_in_large_burst(bfqq); goto end; } @@ -1310,6 +1419,11 @@ static int bfq_bfqq_budget_left(struct bfq_queue *bfqq) { struct bfq_entity *entity = &bfqq->entity; + if (entity->budget < entity->service) { + pr_crit("budget %d service %d\n", + entity->budget, entity->service); + BUG(); + } return entity->budget - entity->service; } @@ -1474,10 +1588,13 @@ static bool bfq_bfqq_update_budg_for_activation(struct bfq_data *bfqd, * entity->budget the remaining budget on such an * expiration. */ + BFQ_BUG_ON(bfqq->max_budget < 0); entity->budget = min_t(unsigned long, bfq_bfqq_budget_left(bfqq), bfqq->max_budget); + BFQ_BUG_ON(entity->budget < 0); + /* * At this point, we have used entity->service to get * the budget left (needed for updating @@ -1496,8 +1613,11 @@ static bool bfq_bfqq_update_budg_for_activation(struct bfq_data *bfqd, * We can finally complete expiration, by setting service to 0. */ entity->service = 0; + BFQ_BUG_ON(bfqq->max_budget < 0); entity->budget = max_t(unsigned long, bfqq->max_budget, bfq_serv_to_charge(bfqq->next_rq, bfqq)); + BFQ_BUG_ON(entity->budget < 0); + bfq_clear_bfqq_non_blocking_wait_rq(bfqq); return false; } @@ -1557,13 +1677,23 @@ static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd, bfqq->entity.budget = min_t(unsigned long, bfqq->entity.budget, 2 * bfq_min_budget(bfqd)); + + bfq_log_bfqq(bfqd, bfqq, + "wrais starting at %lu, rais_max_time %u", + jiffies, + jiffies_to_msecs(bfqq->wr_cur_max_time)); } else if (old_wr_coeff > 1) { if (interactive) { /* update wr coeff and duration */ bfqq->wr_coeff = bfqd->bfq_wr_coeff; bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); - } else if (in_burst) + } else if (in_burst) { bfqq->wr_coeff = 1; - else if (soft_rt) { + bfq_log_bfqq(bfqd, bfqq, + "wrais ending at %lu, rais_max_time %u", + jiffies, + jiffies_to_msecs(bfqq-> + wr_cur_max_time)); + } else if (soft_rt) { /* * The application is now or still meeting the * requirements for being deemed soft rt. We @@ -1597,12 +1727,17 @@ static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd, bfqd->bfq_wr_rt_max_time) { bfqq->wr_start_at_switch_to_srt = bfqq->last_wr_start_finish; + BFQ_BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish)); bfqq->wr_cur_max_time = bfqd->bfq_wr_rt_max_time; bfqq->wr_coeff = bfqd->bfq_wr_coeff * BFQ_SOFTRT_WEIGHT_FACTOR; - } + bfq_log_bfqq(bfqd, bfqq, + "switching to soft_rt wr"); + } else + bfq_log_bfqq(bfqd, bfqq, + "moving forward soft_rt wr duration"); bfqq->last_wr_start_finish = jiffies; } } @@ -1665,6 +1800,18 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, bfqq->ttime.last_end_request + bfqd->bfq_slice_idle * 3; + bfq_log_bfqq(bfqd, bfqq, + "bfq_add_request non-busy: " + "jiffies %lu, in_time %d, idle_long %d busyw %d " + "wr_coeff %u", + jiffies, arrived_in_time, + idle_for_long_time, + bfq_bfqq_non_blocking_wait_rq(bfqq), + old_wr_coeff); + + BFQ_BUG_ON(bfqq->entity.budget < bfqq->entity.service); + + BFQ_BUG_ON(bfqq == bfqd->in_service_queue); /* * bfqq deserves to be weight-raised if: @@ -1685,6 +1832,15 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, (bfq_bfqq_sync(bfqq) && bfqq->bic && (*interactive || soft_rt))); + bfq_log_bfqq(bfqd, bfqq, + "bfq_add_request: " + "in_burst %d, " + "soft_rt %d (next %lu), inter %d, bic %p", + bfq_bfqq_in_large_burst(bfqq), soft_rt, + bfqq->soft_rt_next_start, + *interactive, + bfqq->bic); + /* * Using the last flag, update budget and check whether bfqq * may want to preempt the in-service queue. @@ -1726,6 +1882,8 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, bfq_mark_bfqq_IO_bound(bfqq); } else bfqq->requests_within_timer = 0; + bfq_log_bfqq(bfqd, bfqq, "requests in time %d", + bfqq->requests_within_timer); } if (bfqd->low_latency) { @@ -1754,6 +1912,19 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, bfq_add_bfqq_busy(bfqd, bfqq); + if (bfqd->in_service_queue) + bfq_log_bfqq(bfqd, bfqq, + "wants to preempt %d, higher %d, may preempt %d", + bfqq_wants_to_preempt, + bfq_bfqq_higher_class_or_weight(bfqq, + bfqd-> + in_service_queue), + next_queue_may_preempt(bfqd) + ); + else + bfq_log_bfqq(bfqd, bfqq, + "no queue in service"); + /* * Expire in-service queue only if preemption may be needed * for guarantees. In particular, we care only about two @@ -1790,9 +1961,14 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, ((bfqq_wants_to_preempt && bfqq->wr_coeff >= bfqd->in_service_queue->wr_coeff) || bfq_bfqq_higher_class_or_weight(bfqq, bfqd->in_service_queue)) && - next_queue_may_preempt(bfqd)) + next_queue_may_preempt(bfqd)) { + struct bfq_queue *in_serv = + bfqd->in_service_queue; + BFQ_BUG_ON(in_serv == bfqq); + bfq_bfqq_expire(bfqd, bfqd->in_service_queue, false, BFQQE_PREEMPTED); + } } static void bfq_reset_inject_limit(struct bfq_data *bfqd, @@ -1859,6 +2035,8 @@ static void bfq_reset_inject_limit(struct bfq_data *bfqd, bfqq->inject_limit = 1; bfqq->decrease_time_jif = jiffies; + + bfq_log_bfqq(bfqd, bfqq, ""); } static void bfq_add_request(struct request *rq) @@ -1869,11 +2047,42 @@ static void bfq_add_request(struct request *rq) unsigned int old_wr_coeff = bfqq->wr_coeff; bool interactive = false; - bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq)); + bfq_log_bfqq(bfqd, bfqq, "%p size %u %s", + rq, blk_rq_sectors(rq), rq_is_sync(rq) ? "S" : "A"); + + if (bfqq->wr_coeff > 1) /* queue is being weight-raised */ + bfq_log_bfqq(bfqd, bfqq, + "raising period dur %u/%u msec, old coeff %u, w %d(%d)", + jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish), + jiffies_to_msecs(bfqq->wr_cur_max_time), + bfqq->wr_coeff, + bfqq->entity.weight, bfqq->entity.orig_weight); + bfqq->queued[rq_is_sync(rq)]++; bfqd->queued++; + bfq_log_bfqq(bfqd, bfqq, "new in-bfqq[%d] %d, in-bfqd %d", + rq_is_sync(rq), bfqq->queued[rq_is_sync(rq)], + bfqd->queued); + + BFQ_BUG_ON(!RQ_BFQQ(rq)); + BFQ_BUG_ON(RQ_BFQQ(rq) != bfqq); + WARN_ON(blk_rq_sectors(rq) == 0); if (RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_sync(bfqq)) { + bfq_log_bfqq(bfqd, bfqq, + "in_serv %d reset %d recalc %d", + bfqq == bfqd->in_service_queue, + time_is_before_eq_jiffies( + bfqq->decrease_time_jif + + msecs_to_jiffies(1000)), + time_is_before_eq_jiffies( + bfqq->decrease_time_jif + + msecs_to_jiffies(100))); + + bfq_log_bfqq(bfqd, bfqq, + "limit %u rq_in_driver %d rqs_injected %d", + bfqq->inject_limit, bfqd->rq_in_driver, bfqd->rqs_injected); + /* * Detect whether bfqq's I/O seems synchronized with * that of some other queue, i.e., whether bfqq, after @@ -1929,7 +2138,6 @@ static void bfq_add_request(struct request *rq) * I/O-plugging interval for bfqq. */ if (bfqd->last_completed_rq_bfqq && - !bfq_bfqq_has_short_ttime(bfqq) && ktime_get_ns() - bfqd->last_completion < 200 * NSEC_PER_USEC) { if (bfqd->last_completed_rq_bfqq != bfqq && @@ -1969,6 +2177,9 @@ static void bfq_add_request(struct request *rq) &bfqd->last_completed_rq_bfqq->woken_list); bfq_clear_bfqq_has_waker(bfqq); + bfq_log_bfqq(bfqd, bfqq, + "tentative waker: %d", + bfqq->waker_bfqq->pid); } else if (bfqd->last_completed_rq_bfqq == bfqq->waker_bfqq && !bfq_bfqq_has_waker(bfqq)) { @@ -1977,6 +2188,8 @@ static void bfq_add_request(struct request *rq) * seen for the second time */ bfq_mark_bfqq_has_waker(bfqq); + bfq_log_bfqq(bfqd, bfqq, "has waker set to %d", + bfqq->waker_bfqq->pid); } } @@ -2044,6 +2257,7 @@ static void bfq_add_request(struct request *rq) */ if (bfqd->rq_in_driver == 0) bfqd->rqs_injected = false; + bfq_log_bfqq(bfqd, bfqq, "start limit update"); } } @@ -2054,6 +2268,9 @@ static void bfq_add_request(struct request *rq) */ prev = bfqq->next_rq; next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position); + BFQ_BUG_ON(!next_rq); + BFQ_BUG_ON(!RQ_BFQQ(next_rq)); + BFQ_BUG_ON(RQ_BFQQ(next_rq) != bfqq); bfqq->next_rq = next_rq; /* @@ -2075,7 +2292,13 @@ static void bfq_add_request(struct request *rq) bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); bfqd->wr_busy_queues++; + BFQ_BUG_ON(bfqd->wr_busy_queues > bfq_tot_busy_queues(bfqd)); bfqq->entity.prio_changed = 1; + bfq_log_bfqq(bfqd, bfqq, + "non-idle wrais starting, " + "wr_max_time %u wr_busy %d", + jiffies_to_msecs(bfqq->wr_cur_max_time), + bfqd->wr_busy_queues); } if (prev != bfqq->next_rq) bfq_updated_next_req(bfqd, bfqq); @@ -2118,6 +2341,7 @@ static struct request *bfq_find_rq_fmerge(struct bfq_data *bfqd, { struct bfq_queue *bfqq = bfqd->bio_bfqq; + BFQ_BUG_ON(!bfqd->bio_bfqq_set); if (bfqq) return elv_rb_find(&bfqq->sort_list, bio_end_sector(bio)); @@ -2145,6 +2369,7 @@ static void bfq_deactivate_request(struct request_queue *q, struct request *rq) { struct bfq_data *bfqd = q->elevator->elevator_data; + BFQ_BUG_ON(bfqd->rq_in_driver == 0); bfqd->rq_in_driver--; } #endif @@ -2156,25 +2381,49 @@ static void bfq_remove_request(struct request_queue *q, struct bfq_data *bfqd = bfqq->bfqd; const int sync = rq_is_sync(rq); + BFQ_BUG_ON(bfqq->entity.service > bfqq->entity.budget); + if (bfqq->next_rq == rq) { bfqq->next_rq = bfq_find_next_rq(bfqd, bfqq, rq); + if (bfqq->next_rq && !RQ_BFQQ(bfqq->next_rq)) { + pr_crit("no bfqq! for next rq %p bfqq %p\n", + bfqq->next_rq, bfqq); + } + + BFQ_BUG_ON(bfqq->next_rq && !RQ_BFQQ(bfqq->next_rq)); + if (bfqq->next_rq && RQ_BFQQ(bfqq->next_rq) != bfqq) { + pr_crit( + "wrong bfqq! for next rq %p, rq_bfqq %p bfqq %p\n", + bfqq->next_rq, RQ_BFQQ(bfqq->next_rq), bfqq); + } + BFQ_BUG_ON(bfqq->next_rq && RQ_BFQQ(bfqq->next_rq) != bfqq); + bfq_updated_next_req(bfqd, bfqq); } if (rq->queuelist.prev != &rq->queuelist) list_del_init(&rq->queuelist); + BFQ_BUG_ON(bfqq->queued[sync] == 0); + BFQ_BUG_ON(bfqd->queued == 0); + bfqq->queued[sync]--; bfqd->queued--; + bfq_log_bfqq(bfqd, bfqq, "%p in-bfqq[%d] %d in-bfqd %d", + rq, sync, bfqq->queued[sync], bfqd->queued); elv_rb_del(&bfqq->sort_list, rq); - elv_rqhash_del(q, rq); + elv_rqhash_del(rq); if (q->last_merge == rq) q->last_merge = NULL; if (RB_EMPTY_ROOT(&bfqq->sort_list)) { bfqq->next_rq = NULL; + BFQ_BUG_ON(bfqq->entity.budget < 0); + if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->in_service_queue) { + BFQ_BUG_ON(bfqq->ref < 2); /* referred by rq + * and on tree */ bfq_del_bfqq_busy(bfqd, bfqq, false); /* * bfqq emptied. In normal operation, when @@ -2200,14 +2449,16 @@ static void bfq_remove_request(struct request_queue *q, bfqq->pos_root = NULL; } } else { + BFQ_BUG_ON(!bfqq->next_rq); /* see comments on bfq_pos_tree_add_move() for the unlikely() */ if (unlikely(!bfqd->nonrot_with_queueing)) bfq_pos_tree_add_move(bfqd, bfqq); } - if (rq->cmd_flags & REQ_META) + if (rq->cmd_flags & REQ_META) { + BFQ_BUG_ON(bfqq->meta_pending == 0); bfqq->meta_pending--; - + } } static bool bfq_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio, @@ -2233,11 +2484,20 @@ static bool bfq_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio, else bfqd->bio_bfqq = NULL; bfqd->bio_bic = bic; + /* Set next flag just for testing purposes */ + bfqd->bio_bfqq_set = true; ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free); + /* + * XXX Not yet freeing without lock held, to avoid an + * inconsistency with respect to the lock-protected invocation + * of blk_mq_sched_try_insert_merge in bfq_bio_merge. Waiting + * for clarifications from Jens. + */ if (free) blk_mq_free_request(free); + bfqd->bio_bfqq_set = false; spin_unlock_irq(&bfqd->lock); return ret; @@ -2252,6 +2512,8 @@ static int bfq_request_merge(struct request_queue *q, struct request **req, __rq = bfq_find_rq_fmerge(bfqd, bio, q); if (__rq && elv_bio_merge_ok(__rq, bio)) { *req = __rq; + bfq_log(bfqd, "req %p", __rq); + return ELEVATOR_FRONT_MERGE; } @@ -2263,6 +2525,8 @@ static struct bfq_queue *bfq_init_rq(struct request *rq); static void bfq_request_merged(struct request_queue *q, struct request *req, enum elv_merge type) { + BFQ_BUG_ON(req->rq_flags & RQF_DISP_LIST); + if (type == ELEVATOR_FRONT_MERGE && rb_prev(&req->rb_node) && blk_rq_pos(req) < @@ -2279,13 +2543,22 @@ static void bfq_request_merged(struct request_queue *q, struct request *req, /* Reposition request in its sort_list */ elv_rb_del(&bfqq->sort_list, req); + BFQ_BUG_ON(!RQ_BFQQ(req)); + BFQ_BUG_ON(RQ_BFQQ(req) != bfqq); elv_rb_add(&bfqq->sort_list, req); /* Choose next request to be served for bfqq */ prev = bfqq->next_rq; next_rq = bfq_choose_req(bfqd, bfqq->next_rq, req, bfqd->last_position); + BFQ_BUG_ON(!next_rq); + bfqq->next_rq = next_rq; + + bfq_log_bfqq(bfqd, bfqq, + "req %p prev %p next_rq %p bfqq %p", + req, prev, next_rq, bfqq); + /* * If next_rq changes, update both the queue's budget to * fit the new request and the queue's position in its @@ -2325,6 +2598,16 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq, if (!bfqq) return; + BFQ_BUG_ON(!RQ_BFQQ(rq)); + BFQ_BUG_ON(!RQ_BFQQ(next)); /* this does not imply next is in a bfqq */ + BFQ_BUG_ON(rq->rq_flags & RQF_DISP_LIST); + BFQ_BUG_ON(next->rq_flags & RQF_DISP_LIST); + + lockdep_assert_held(&bfqq->bfqd->lock); + + bfq_log_bfqq(bfqq->bfqd, bfqq, + "rq %p next %p bfqq %p next_bfqq %p", + rq, next, bfqq, next_bfqq); /* * If next and rq belong to the same bfq_queue and next is older @@ -2352,8 +2635,12 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq, /* Must be called with bfqq != NULL */ static void bfq_bfqq_end_wr(struct bfq_queue *bfqq) { - if (bfq_bfqq_busy(bfqq)) + BFQ_BUG_ON(!bfqq); + + if (bfq_bfqq_busy(bfqq)) { bfqq->bfqd->wr_busy_queues--; + BFQ_BUG_ON(bfqq->bfqd->wr_busy_queues < 0); + } bfqq->wr_coeff = 1; bfqq->wr_cur_max_time = 0; bfqq->last_wr_start_finish = jiffies; @@ -2362,6 +2649,12 @@ static void bfq_bfqq_end_wr(struct bfq_queue *bfqq) * __bfq_entity_update_weight_prio. */ bfqq->entity.prio_changed = 1; + bfq_log_bfqq(bfqq->bfqd, bfqq, + "wrais ending at %lu, rais_max_time %u", + bfqq->last_wr_start_finish, + jiffies_to_msecs(bfqq->wr_cur_max_time)); + bfq_log_bfqq(bfqq->bfqd, bfqq, "wr_busy %d", + bfqq->bfqd->wr_busy_queues); } void bfq_end_wr_async_queues(struct bfq_data *bfqd, @@ -2432,6 +2725,7 @@ static struct bfq_queue *bfqq_find_close(struct bfq_data *bfqd, * next_request position). */ __bfqq = rb_entry(parent, struct bfq_queue, pos_node); + BFQ_BUG_ON(!__bfqq->next_rq); if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector)) return __bfqq; @@ -2443,6 +2737,7 @@ static struct bfq_queue *bfqq_find_close(struct bfq_data *bfqd, return NULL; __bfqq = rb_entry(node, struct bfq_queue, pos_node); + BFQ_BUG_ON(!__bfqq->next_rq); if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector)) return __bfqq; @@ -2531,8 +2826,12 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) { - if (bfq_too_late_for_merging(new_bfqq)) + if (bfq_too_late_for_merging(new_bfqq)) { + bfq_log_bfqq(bfqq->bfqd, bfqq, + "too late for bfq%d to be merged", + new_bfqq->pid); return false; + } if (bfq_class_idle(bfqq) || bfq_class_idle(new_bfqq) || (bfqq->ioprio_class != new_bfqq->ioprio_class)) @@ -2634,8 +2933,11 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, * happen to do close I/O for some short time interval, have * their queues merged by mistake. */ - if (bfq_too_late_for_merging(bfqq)) + if (bfq_too_late_for_merging(bfqq)) { + bfq_log_bfqq(bfqd, bfqq, + "would have looked for coop, but too late"); return NULL; + } if (bfqq->new_bfqq) return bfqq->new_bfqq; @@ -2667,6 +2969,8 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, new_bfqq = bfq_find_close_cooperator(bfqd, bfqq, bfq_io_struct_pos(io_struct, request)); + BFQ_BUG_ON(new_bfqq && bfqq->entity.parent != new_bfqq->entity.parent); + if (new_bfqq && likely(new_bfqq != &bfqd->oom_bfqq) && bfq_may_be_close_cooperator(bfqq, new_bfqq)) return bfq_setup_merge(bfqq, new_bfqq); @@ -2715,6 +3019,11 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq) bic->saved_last_wr_start_finish = bfqq->last_wr_start_finish; bic->saved_wr_cur_max_time = bfqq->wr_cur_max_time; } + BFQ_BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish)); + bfq_log_bfqq(bfqq->bfqd, bfqq, + "bic %p wr_coeff %d start_finish %lu max_time %lu", + bic, bfqq->wr_coeff, bfqq->last_wr_start_finish, + bfqq->wr_cur_max_time); } void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq) @@ -2734,6 +3043,7 @@ void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfqq != bfqd->in_service_queue) bfq_del_bfqq_busy(bfqd, bfqq, false); + bfqq->proc_ref--; bfq_put_queue(bfqq); } @@ -2743,6 +3053,10 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, { bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu", (unsigned long)new_bfqq->pid); + + BFQ_BUG_ON(new_bfqq == &bfqd->oom_bfqq); + + BFQ_BUG_ON(bfqq->bic && bfqq->bic == new_bfqq->bic); /* Save weight raising and idle window of the merged queues */ bfq_bfqq_save_state(bfqq); bfq_bfqq_save_state(new_bfqq); @@ -2765,19 +3079,30 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, new_bfqq->last_wr_start_finish = bfqq->last_wr_start_finish; new_bfqq->wr_start_at_switch_to_srt = bfqq->wr_start_at_switch_to_srt; - if (bfq_bfqq_busy(new_bfqq)) + if (bfq_bfqq_busy(new_bfqq)) { bfqd->wr_busy_queues++; + BFQ_BUG_ON(bfqd->wr_busy_queues > + bfq_tot_busy_queues(bfqd)); + } + new_bfqq->entity.prio_changed = 1; + bfq_log_bfqq(bfqd, new_bfqq, + "wr start after merge with %d, rais_max_time %u", + bfqq->pid, + jiffies_to_msecs(bfqq->wr_cur_max_time)); } if (bfqq->wr_coeff > 1) { /* bfqq has given its wr to new_bfqq */ bfqq->wr_coeff = 1; bfqq->entity.prio_changed = 1; - if (bfq_bfqq_busy(bfqq)) + if (bfq_bfqq_busy(bfqq)) { bfqd->wr_busy_queues--; + BFQ_BUG_ON(bfqd->wr_busy_queues < 0); + } + } - bfq_log_bfqq(bfqd, new_bfqq, "merge_bfqqs: wr_busy %d", + bfq_log_bfqq(bfqd, new_bfqq, "wr_busy %d", bfqd->wr_busy_queues); /* @@ -2817,6 +3142,7 @@ static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq, bool is_sync = op_is_sync(bio->bi_opf); struct bfq_queue *bfqq = bfqd->bio_bfqq, *new_bfqq; + assert_spin_locked(&bfqd->lock); /* * Disallow merge of a sync bio into an async request. */ @@ -2827,6 +3153,7 @@ static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq, * Lookup the bfqq that this bio will be queued with. Allow * merge only if rq is queued there. */ + BFQ_BUG_ON(!bfqd->bio_bfqq_set); if (!bfqq) return false; @@ -2835,6 +3162,9 @@ static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq, * of the queues of possible cooperating processes. */ new_bfqq = bfq_setup_cooperator(bfqd, bfqq, bio, false); + BFQ_BUG_ON(new_bfqq == bfqq); + BFQ_BUG_ON(new_bfqq == &bfqd->oom_bfqq); + if (new_bfqq) { /* * bic still points to bfqq, then it has not yet been @@ -2884,6 +3214,9 @@ static void bfq_set_budget_timeout(struct bfq_data *bfqd, bfqq->budget_timeout = jiffies + bfqd->bfq_timeout * timeout_coeff; + + bfq_log_bfqq(bfqd, bfqq, "%u", + jiffies_to_msecs(bfqd->bfq_timeout * timeout_coeff)); } static void __bfq_set_in_service_queue(struct bfq_data *bfqd, @@ -2894,6 +3227,8 @@ static void __bfq_set_in_service_queue(struct bfq_data *bfqd, bfqd->budgets_assigned = (bfqd->budgets_assigned * 7 + 256) / 8; + BFQ_BUG_ON(bfqq == bfqd->in_service_queue); + if (time_is_before_jiffies(bfqq->last_wr_start_finish) && bfqq->wr_coeff > 1 && bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time && @@ -2928,15 +3263,30 @@ static void __bfq_set_in_service_queue(struct bfq_data *bfqd, jiffies - bfqq->budget_timeout; else bfqq->last_wr_start_finish = jiffies; + + if (time_is_after_jiffies(bfqq->last_wr_start_finish)) { + pr_crit( + "BFQ WARNING:last %lu budget %lu jiffies %lu", + bfqq->last_wr_start_finish, + bfqq->budget_timeout, + jiffies); + pr_crit("diff %lu", jiffies - + max_t(unsigned long, + bfqq->last_wr_start_finish, + bfqq->budget_timeout)); + bfqq->last_wr_start_finish = jiffies; + } } bfq_set_budget_timeout(bfqd, bfqq); bfq_log_bfqq(bfqd, bfqq, - "set_in_service_queue, cur-budget = %d", - bfqq->entity.budget); - } + "cur-budget = %d prio_class %d", + bfqq->entity.budget, bfqq->ioprio_class); + } else + bfq_log(bfqd, "NULL"); bfqd->in_service_queue = bfqq; + bfqd->in_serv_last_pos = -1; } /* @@ -2947,6 +3297,7 @@ static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd) struct bfq_queue *bfqq = bfq_get_next_queue(bfqd); __bfq_set_in_service_queue(bfqd, bfqq); + BFQ_BUG_ON(bfqq && !bfqq->entity.on_st_or_in_serv); return bfqq; } @@ -2955,6 +3306,8 @@ static void bfq_arm_slice_timer(struct bfq_data *bfqd) struct bfq_queue *bfqq = bfqd->in_service_queue; u32 sl; + BFQ_BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); + bfq_mark_bfqq_wait_request(bfqq); /* @@ -2985,6 +3338,8 @@ static void bfq_arm_slice_timer(struct bfq_data *bfqd) hrtimer_start(&bfqd->idle_slice_timer, ns_to_ktime(sl), HRTIMER_MODE_REL); bfqg_stats_set_start_idle_time(bfqq_group(bfqq)); + bfq_log_bfqq(bfqd, bfqq, "arm idle: %ld/%ld ms", + sl / NSEC_PER_MSEC, bfqd->bfq_slice_idle / NSEC_PER_MSEC); } /* @@ -3010,7 +3365,9 @@ static void update_thr_responsiveness_params(struct bfq_data *bfqd) if (bfqd->bfq_user_max_budget == 0) { bfqd->bfq_max_budget = bfq_calc_max_budget(bfqd); - bfq_log(bfqd, "new max_budget = %d", bfqd->bfq_max_budget); + BFQ_BUG_ON(bfqd->bfq_max_budget < 0); + bfq_log(bfqd, "new max_budget = %d", + bfqd->bfq_max_budget); } } @@ -3027,7 +3384,7 @@ static void bfq_reset_rate_computation(struct bfq_data *bfqd, bfqd->peak_rate_samples = 0; /* full re-init on next disp. */ bfq_log(bfqd, - "reset_rate_computation at end, sample %u/%u tot_sects %llu", + "at end, sample %u/%u tot_sects %llu", bfqd->peak_rate_samples, bfqd->sequential_samples, bfqd->tot_sectors_dispatched); } @@ -3045,8 +3402,12 @@ static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq) * for a new evaluation attempt. */ if (bfqd->peak_rate_samples < BFQ_RATE_MIN_SAMPLES || - bfqd->delta_from_first < BFQ_RATE_MIN_INTERVAL) + bfqd->delta_from_first < BFQ_RATE_MIN_INTERVAL) { + bfq_log(bfqd, + "only resetting, delta_first %lluus samples %d", + bfqd->delta_from_first>>10, bfqd->peak_rate_samples); goto reset_computation; + } /* * If a new request completion has occurred after last @@ -3058,6 +3419,7 @@ static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq) max_t(u64, bfqd->delta_from_first, bfqd->last_completion - bfqd->first_dispatch); + BFQ_BUG_ON(bfqd->delta_from_first == 0); /* * Rate computed in sects/usec, and not sects/nsec, for * precision issues. @@ -3065,6 +3427,12 @@ static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq) rate = div64_ul(bfqd->tot_sectors_dispatched<delta_from_first, NSEC_PER_USEC)); + bfq_log(bfqd, +"tot_sects %llu delta_first %lluus rate %llu sects/s (%d)", + bfqd->tot_sectors_dispatched, bfqd->delta_from_first>>10, + ((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT), + rate > 20<sequential_samples < (3 * bfqd->peak_rate_samples)>>2 && rate <= bfqd->peak_rate) || - rate > 20< 20<peak_rate_samples, bfqd->sequential_samples, + ((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT), + ((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT)); goto reset_computation; + } else { + bfq_log(bfqd, + "do update, samples %u/%u rate/peak %llu/%llu", + bfqd->peak_rate_samples, bfqd->sequential_samples, + ((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT), + ((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT)); + } /* * We have to update the peak rate, at last! To this purpose, @@ -3114,6 +3494,7 @@ static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq) * maximum weight. */ divisor = 10 - weight; + BFQ_BUG_ON(divisor == 0); /* * Finally, update peak rate: @@ -3124,6 +3505,15 @@ static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq) bfqd->peak_rate /= divisor; rate /= divisor; /* smoothing constant alpha = 1/divisor */ + bfq_log(bfqd, + "divisor %d tmp_peak_rate %llu tmp_rate %u", + divisor, + ((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT), + (u32)((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT)); + + BFQ_BUG_ON(bfqd->peak_rate == 0); + BFQ_BUG_ON(bfqd->peak_rate > 20<peak_rate += rate; /* @@ -3136,6 +3526,7 @@ static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq) bfqd->peak_rate = max_t(u32, 1, bfqd->peak_rate); update_thr_responsiveness_params(bfqd); + BFQ_BUG_ON(bfqd->peak_rate > 20<peak_rate_samples == 0) { /* first dispatch */ - bfq_log(bfqd, "update_peak_rate: goto reset, samples %d", - bfqd->peak_rate_samples); + bfq_log(bfqd, + "goto reset, samples %d", + bfqd->peak_rate_samples) ; bfq_reset_rate_computation(bfqd, rq); goto update_last_values; /* will add one sample */ } @@ -3197,8 +3589,13 @@ static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq) * - start a new observation interval with this dispatch */ if (now_ns - bfqd->last_dispatch > 100*NSEC_PER_MSEC && - bfqd->rq_in_driver == 0) + bfqd->rq_in_driver == 0) { + bfq_log(bfqd, +"jumping to updating&resetting delta_last %lluus samples %d", + (now_ns - bfqd->last_dispatch)>>10, + bfqd->peak_rate_samples) ; goto update_rate_and_reset; + } /* Update sampling information */ bfqd->peak_rate_samples++; @@ -3219,6 +3616,12 @@ static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq) bfqd->delta_from_first = now_ns - bfqd->first_dispatch; + bfq_log(bfqd, + "added samples %u/%u tot_sects %llu delta_first %lluus", + bfqd->peak_rate_samples, bfqd->sequential_samples, + bfqd->tot_sectors_dispatched, + bfqd->delta_from_first>>10); + /* Target observation interval not yet reached, go on sampling */ if (bfqd->delta_from_first < BFQ_RATE_REF_INTERVAL) goto update_last_values; @@ -3230,6 +3633,14 @@ static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq) if (RQ_BFQQ(rq) == bfqd->in_service_queue) bfqd->in_serv_last_pos = bfqd->last_position; bfqd->last_dispatch = now_ns; + + bfq_log(bfqd, + "delta_first %lluus last_pos %llu peak_rate %llu", + (now_ns - bfqd->first_dispatch)>>10, + (unsigned long long) bfqd->last_position, + ((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT)); + bfq_log(bfqd, + "samples at end %d", bfqd->peak_rate_samples); } /* @@ -3446,21 +3857,36 @@ static void bfq_dispatch_remove(struct request_queue *q, struct request *rq) static bool idling_needed_for_service_guarantees(struct bfq_data *bfqd, struct bfq_queue *bfqq) { + bool asymmetric_scenario; + /* No point in idling for bfqq if it won't get requests any longer */ if (unlikely(!bfqq_process_refs(bfqq))) return false; - return (bfqq->wr_coeff > 1 && + asymmetric_scenario = (bfqq->wr_coeff > 1 && (bfqd->wr_busy_queues < bfq_tot_busy_queues(bfqd) || bfqd->rq_in_driver >= bfqq->dispatched + 4)) || bfq_asymmetric_scenario(bfqd, bfqq); + + bfq_log_bfqq(bfqd, bfqq, + "wr_coeff %d wr_busy %d busy %d asymmetric %d", + bfqq->wr_coeff, + bfqd->wr_busy_queues, + bfq_tot_busy_queues(bfqd), + asymmetric_scenario); + + return asymmetric_scenario; } static bool __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq, enum bfqq_expiration reason) { + BFQ_BUG_ON(bfqq != bfqd->in_service_queue); + BFQ_BUG_ON(!bfqq->entity.on_st_or_in_serv); + bfqq_process_refs(bfqq); // DEBUG: check process refs consistency + /* * If this bfqq is shared between multiple processes, check * to make sure that those processes are still issuing I/Os @@ -3497,6 +3923,8 @@ static bool __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfq_del_bfqq_busy(bfqd, bfqq, true); } else { + BFQ_BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list) && + !bfqq_process_refs(bfqq)); bfq_requeue_bfqq(bfqd, bfqq, true); /* * Resort priority tree of potential close cooperators. @@ -3533,6 +3961,8 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, struct request *next_rq; int budget, min_budget; + BFQ_BUG_ON(bfqq != bfqd->in_service_queue); + min_budget = bfq_min_budget(bfqd); if (bfqq->wr_coeff == 1) @@ -3545,11 +3975,11 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, */ budget = 2 * min_budget; - bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %d, budg left %d", + bfq_log_bfqq(bfqd, bfqq, "last budg %d, budg left %d", bfqq->entity.budget, bfq_bfqq_budget_left(bfqq)); - bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %d, min budg %d", + bfq_log_bfqq(bfqd, bfqq, "last max_budg %d, min budg %d", budget, bfq_min_budget(bfqd)); - bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d", + bfq_log_bfqq(bfqd, bfqq, "sync %d, seeky %d", bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->in_service_queue)); if (bfq_bfqq_sync(bfqq) && bfqq->wr_coeff == 1) { @@ -3678,9 +4108,14 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, * it will be updated on the arrival of a new request. */ next_rq = bfqq->next_rq; - if (next_rq) + if (next_rq) { + BFQ_BUG_ON(reason == BFQQE_TOO_IDLE || + reason == BFQQE_NO_MORE_REQUESTS); bfqq->entity.budget = max_t(unsigned long, bfqq->max_budget, bfq_serv_to_charge(next_rq, bfqq)); + BFQ_BUG_ON(!bfq_bfqq_busy(bfqq)); + BFQ_BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list)); + } bfq_log_bfqq(bfqd, bfqq, "head sect: %u, new budget %d", next_rq ? blk_rq_sectors(next_rq) : 0, @@ -3747,6 +4182,8 @@ static bool bfq_bfqq_is_slow(struct bfq_data *bfqd, struct bfq_queue *bfqq, else /* charge at least one seek */ *delta_ms = bfq_slice_idle / NSEC_PER_MSEC; + bfq_log(bfqd, "too short %u", delta_usecs); + return slow; } @@ -3768,9 +4205,11 @@ static bool bfq_bfqq_is_slow(struct bfq_data *bfqd, struct bfq_queue *bfqq, * peak rate. */ slow = bfqq->entity.service < bfqd->bfq_max_budget / 2; + bfq_log(bfqd, "relative rate %d/%d", + bfqq->entity.service, bfqd->bfq_max_budget); } - bfq_log_bfqq(bfqd, bfqq, "bfq_bfqq_is_slow: slow %d", slow); + bfq_log_bfqq(bfqd, bfqq, "slow %d", slow); return slow; } @@ -3871,6 +4310,13 @@ static bool bfq_bfqq_is_slow(struct bfq_data *bfqd, struct bfq_queue *bfqq, static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, struct bfq_queue *bfqq) { + bfq_log_bfqq(bfqd, bfqq, +"service_blkg %lu soft_rate %u sects/sec interval %u", + bfqq->service_from_backlogged, + bfqd->bfq_wr_max_softrt_rate, + jiffies_to_msecs(HZ * bfqq->service_from_backlogged / + bfqd->bfq_wr_max_softrt_rate)); + return max3(bfqq->soft_rt_next_start, bfqq->last_idle_bklogged + HZ * bfqq->service_from_backlogged / @@ -3913,6 +4359,8 @@ void bfq_bfqq_expire(struct bfq_data *bfqd, unsigned long delta = 0; struct bfq_entity *entity = &bfqq->entity; + BFQ_BUG_ON(bfqq != bfqd->in_service_queue); + /* * Check whether the process is slow (see bfq_bfqq_is_slow). */ @@ -3939,6 +4387,8 @@ void bfq_bfqq_expire(struct bfq_data *bfqd, bfq_bfqq_budget_left(bfqq) >= entity->budget / 3))) bfq_bfqq_charge_time(bfqd, bfqq, delta); + BFQ_BUG_ON(bfqq->entity.budget < bfqq->entity.service); + if (reason == BFQQE_TOO_IDLE && entity->service <= 2 * entity->budget / 10) bfq_clear_bfqq_IO_bound(bfqq); @@ -3974,11 +4424,14 @@ void bfq_bfqq_expire(struct bfq_data *bfqd, * of all the outstanding requests to discover whether * the request pattern is actually isochronous. */ + BFQ_BUG_ON(bfq_tot_busy_queues(bfqd) < 1); if (bfqq->dispatched == 0 && - bfqq->wr_coeff != bfqd->bfq_wr_coeff) + bfqq->wr_coeff != bfqd->bfq_wr_coeff) { bfqq->soft_rt_next_start = bfq_bfqq_softrt_next_start(bfqd, bfqq); - else if (bfqq->dispatched > 0) { + bfq_log_bfqq(bfqd, bfqq, "new soft_rt_next %lu", + bfqq->soft_rt_next_start); + } else if (bfqq->dispatched > 0) { /* * Schedule an update of soft_rt_next_start to when * the task may be discovered to be isochronous. @@ -3988,8 +4441,10 @@ void bfq_bfqq_expire(struct bfq_data *bfqd, } bfq_log_bfqq(bfqd, bfqq, - "expire (%d, slow %d, num_disp %d, short_ttime %d)", reason, - slow, bfqq->dispatched, bfq_bfqq_has_short_ttime(bfqq)); + "expire (%s, slow %d, num_disp %d, short %d, weight %d, serv %d/%d)", + reason_name[reason], slow, bfqq->dispatched, + bfq_bfqq_has_short_ttime(bfqq), entity->weight, + entity->service, entity->budget); /* * bfqq expired, so no total service time needs to be computed @@ -4003,7 +4458,10 @@ void bfq_bfqq_expire(struct bfq_data *bfqd, * Increase, decrease or leave budget unchanged according to * reason. */ + BFQ_BUG_ON(bfqq->entity.budget < bfqq->entity.service); __bfq_bfqq_recalc_budget(bfqd, bfqq, reason); + BFQ_BUG_ON(bfqq->next_rq == NULL && + bfqq->entity.budget < bfqq->entity.service); if (__bfq_bfqq_expire(bfqd, bfqq, reason)) /* bfqq is gone, no more actions on it */ return; @@ -4012,14 +4470,18 @@ void bfq_bfqq_expire(struct bfq_data *bfqd, if (!bfq_bfqq_busy(bfqq) && reason != BFQQE_BUDGET_TIMEOUT && reason != BFQQE_BUDGET_EXHAUSTED) { + BFQ_BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); + BFQ_BUG_ON(bfqq->next_rq); bfq_mark_bfqq_non_blocking_wait_rq(bfqq); /* * Not setting service to 0, because, if the next rq * arrives in time, the queue will go on receiving * service with this same budget (as if it never expired) */ - } else + } else { entity->service = 0; + bfq_log_bfqq(bfqd, bfqq, "resetting service"); + } /* * Reset the received-service counter for every parent entity. @@ -4064,7 +4526,7 @@ static bool bfq_bfqq_budget_timeout(struct bfq_queue *bfqq) static bool bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq) { bfq_log_bfqq(bfqq->bfqd, bfqq, - "may_budget_timeout: wait_request %d left %d timeout %d", + "wait_request %d left %d timeout %d", bfq_bfqq_wait_request(bfqq), bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3, bfq_bfqq_budget_timeout(bfqq)); @@ -4116,6 +4578,11 @@ static bool idling_boosts_thr_without_issues(struct bfq_data *bfqd, ((!blk_queue_nonrot(bfqd->queue) || !bfqd->hw_tag) && bfqq_sequential_and_IO_bound); + bfq_log_bfqq(bfqd, bfqq, "rot_no_q %d q %d seq %d boost %d", + rot_without_queueing, bfqd->hw_tag, + bfqq_sequential_and_IO_bound, + idling_boosts_thr); + /* * The return value of this function is equal to that of * idling_boosts_thr, unless a special case holds. In this @@ -4211,6 +4678,13 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq) * either boosts the throughput (without issues), or is * necessary to preserve service guarantees. */ + bfq_log_bfqq(bfqd, bfqq, + "wr_busy %d boosts %d IO-bound %d guar %d", + bfqd->wr_busy_queues, + idling_boosts_thr_with_no_issue, + bfq_bfqq_IO_bound(bfqq), + idling_needed_for_service_guar); + return idling_boosts_thr_with_no_issue || idling_needed_for_service_guar; } @@ -4275,7 +4749,7 @@ bfq_choose_bfqq_for_injection(struct bfq_data *bfqd) limit = 1; if (bfqd->rq_in_driver >= limit) - return NULL; + goto no_queue; /* * Linear search of the source queue for injection; but, with @@ -4294,6 +4768,8 @@ bfq_choose_bfqq_for_injection(struct bfq_data *bfqd) (in_serv_always_inject || bfqq->wr_coeff > 1) && bfq_serv_to_charge(bfqq->next_rq, bfqq) <= bfq_bfqq_budget_left(bfqq)) { + bfq_log_bfqq(bfqd, bfqq, "found this queue"); + /* * Allow for only one large in-flight request * on non-rotational devices, for the @@ -4318,12 +4794,22 @@ bfq_choose_bfqq_for_injection(struct bfq_data *bfqd) else limit = in_serv_bfqq->inject_limit; + bfq_log_bfqq(bfqd, bfqq, + "rq_sect %u in_driver %d limit %u", + blk_rq_sectors(bfqq->next_rq), + bfqd->rq_in_driver, limit); + if (bfqd->rq_in_driver < limit) { + bfq_log_bfqq(bfqd, bfqq, + "returned this queue, rqs_inj set"); bfqd->rqs_injected = true; return bfqq; } } +no_queue: + bfq_log(bfqd, "no queue found: in_driver %d limit %u", + bfqd->rq_in_driver, limit); return NULL; } @@ -4341,7 +4827,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) if (!bfqq) goto new_queue; - bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue"); + bfq_log_bfqq(bfqd, bfqq, "already in-service queue"); /* * Do not expire bfqq for budget timeout if bfqq may be about @@ -4367,6 +4853,8 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) * serve them, keep the queue, otherwise expire it. */ if (next_rq) { + BFQ_BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list)); + if (bfq_serv_to_charge(next_rq, bfqq) > bfq_bfqq_budget_left(bfqq)) { /* @@ -4420,6 +4908,26 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) bfqq->bic->bfqq[0]->next_rq ? bfqq->bic->bfqq[0] : NULL; + bfq_log_bfqq(bfqd, bfqq, + "bic %p bfqq[0] %p busy %d", + bfqq->bic, + bfqq->bic ? bfqq->bic->bfqq[0] : NULL, + (bfqq->bic && bfqq->bic->bfqq[0]) ? + bfq_bfqq_busy(bfqq->bic->bfqq[0]) : false); + + BFQ_BUG_ON(async_bfqq && !bfq_bfqq_sync(bfqq)); + + if (async_bfqq) + bfq_log_bfqq(bfqd, bfqq, + "bic ok %d serv_to_charge %lu, budg_left %d", + icq_to_bic(async_bfqq->next_rq->elv.icq) + == bfqq->bic, + bfq_serv_to_charge(async_bfqq->next_rq, + async_bfqq), + bfq_bfqq_budget_left(async_bfqq) + ); + BFQ_BUG_ON(bfqq->waker_bfqq == bfqq); + /* * The next three mutually-exclusive ifs decide * whether to try injection, and choose the queue to @@ -4495,23 +5003,47 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) if (async_bfqq && icq_to_bic(async_bfqq->next_rq->elv.icq) == bfqq->bic && bfq_serv_to_charge(async_bfqq->next_rq, async_bfqq) <= - bfq_bfqq_budget_left(async_bfqq)) + bfq_bfqq_budget_left(async_bfqq)) { + bfq_log_bfqq(bfqd, bfqq, + "choosing directly the async queue %d", + bfqq->bic->bfqq[0]->pid); + BUG_ON(bfqq->bic->bfqq[0] == bfqq); bfqq = bfqq->bic->bfqq[0]; - else if (bfq_bfqq_has_waker(bfqq) && + bfq_log_bfqq(bfqd, bfqq, + "chosen directly this async queue"); + } else if (bfq_bfqq_has_waker(bfqq) && bfq_bfqq_busy(bfqq->waker_bfqq) && - bfqq->next_rq && + bfqq->waker_bfqq->next_rq && bfq_serv_to_charge(bfqq->waker_bfqq->next_rq, bfqq->waker_bfqq) <= bfq_bfqq_budget_left(bfqq->waker_bfqq) - ) + ) { + bfq_log_bfqq(bfqd, bfqq, + "choosing directly the waker queue %d", + bfqq->waker_bfqq->pid); + BUG_ON(bfqq->waker_bfqq == bfqq); bfqq = bfqq->waker_bfqq; - else if (!idling_boosts_thr_without_issues(bfqd, bfqq) && + bfq_log_bfqq(bfqd, bfqq, + "chosen directly this waker queue"); + } else if (!idling_boosts_thr_without_issues(bfqd, bfqq) && (bfqq->wr_coeff == 1 || bfqd->wr_busy_queues > 1 || - !bfq_bfqq_has_short_ttime(bfqq))) - bfqq = bfq_choose_bfqq_for_injection(bfqd); - else + !bfq_bfqq_has_short_ttime(bfqq))) { + struct bfq_queue *new_bfqq; + + bfq_log_bfqq(bfqd, bfqq, + "looking inject wr_busy %d long_tt %d", + bfqd->wr_busy_queues, + !bfq_bfqq_has_short_ttime(bfqq)); + new_bfqq = bfq_choose_bfqq_for_injection(bfqd); + BUG_ON(new_bfqq == bfqq); + if (new_bfqq) + bfq_log_bfqq(bfqd, bfqq, + "chosen the queue %d for injection", + new_bfqq->pid); + bfqq = new_bfqq; + } else { bfqq = NULL; - + } goto keep_queue; } @@ -4521,14 +5053,14 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) new_queue: bfqq = bfq_set_in_service_queue(bfqd); if (bfqq) { - bfq_log_bfqq(bfqd, bfqq, "select_queue: checking new queue"); + bfq_log_bfqq(bfqd, bfqq, "checking new queue"); goto check_queue; } keep_queue: if (bfqq) - bfq_log_bfqq(bfqd, bfqq, "select_queue: returned this queue"); + bfq_log_bfqq(bfqd, bfqq, "returned this queue"); else - bfq_log(bfqd, "select_queue: no queue returned"); + bfq_log(bfqd, "no queue returned"); return bfqq; } @@ -4538,6 +5070,9 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) struct bfq_entity *entity = &bfqq->entity; if (bfqq->wr_coeff > 1) { /* queue is being weight-raised */ + BFQ_BUG_ON(bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time && + time_is_after_jiffies(bfqq->last_wr_start_finish)); + bfq_log_bfqq(bfqd, bfqq, "raising period dur %u/%u msec, old coeff %u, w %d(%d)", jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish), @@ -4545,6 +5080,8 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfqq->wr_coeff, bfqq->entity.weight, bfqq->entity.orig_weight); + BFQ_BUG_ON(bfqq != bfqd->in_service_queue && entity->weight != + entity->orig_weight * bfqq->wr_coeff); if (entity->prio_changed) bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change"); @@ -4563,7 +5100,11 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfq_bfqq_end_wr(bfqq); else { switch_back_to_interactive_wr(bfqq, bfqd); + BFQ_BUG_ON(time_is_after_jiffies( + bfqq->last_wr_start_finish)); bfqq->entity.prio_changed = 1; + bfq_log_bfqq(bfqd, bfqq, + "back to interactive wr"); } } if (bfqq->wr_coeff > 1 && @@ -4571,6 +5112,10 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfqq->service_from_wr > max_service_from_wr) { /* see comments on max_service_from_wr */ bfq_bfqq_end_wr(bfqq); + bfq_log_bfqq(bfqd, bfqq, + "too much service %lu > %lu", + bfqq->service_from_wr, + max_service_from_wr); } } /* @@ -4595,17 +5140,32 @@ static struct request *bfq_dispatch_rq_from_bfqq(struct bfq_data *bfqd, struct request *rq = bfqq->next_rq; unsigned long service_to_charge; + BFQ_BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list)); + BFQ_BUG_ON(!rq); service_to_charge = bfq_serv_to_charge(rq, bfqq); + BFQ_BUG_ON(service_to_charge > bfq_bfqq_budget_left(bfqq)); + + BFQ_BUG_ON(bfqq->entity.budget < bfqq->entity.service); + bfq_bfqq_served(bfqq, service_to_charge); + BFQ_BUG_ON(bfqq->entity.budget < bfqq->entity.service); + if (bfqq == bfqd->in_service_queue && bfqd->wait_dispatch) { + bfq_log_bfqq(bfqd, bfqq, "set waited_rq to %p", rq); bfqd->wait_dispatch = false; bfqd->waited_rq = rq; } - bfq_dispatch_remove(bfqd->queue, rq); + bfq_log_bfqq(bfqd, bfqq, + "dispatched %u sec req (%llu), budg left %d, new disp_nr %d", + blk_rq_sectors(rq), + (unsigned long long) blk_rq_pos(rq), + bfq_bfqq_budget_left(bfqq), + bfqq->dispatched); + if (bfqq != bfqd->in_service_queue) goto return_rq; @@ -4640,6 +5200,13 @@ static bool bfq_has_work(struct blk_mq_hw_ctx *hctx) { struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; + if (!atomic_read(&hctx->elevator_queued)) + return false; + + bfq_log(bfqd, "dispatch_non_empty %d busy_queues %d", + !list_empty_careful(&bfqd->dispatch), + bfq_tot_busy_queues(bfqd) > 0); + /* * Avoiding lock: a race on bfqd->busy_queues should cause at * most a call to dispatch for nothing @@ -4658,7 +5225,10 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) rq = list_first_entry(&bfqd->dispatch, struct request, queuelist); list_del_init(&rq->queuelist); + rq->rq_flags &= ~RQF_DISP_LIST; + bfq_log(bfqd, + "picked %p from dispatch list", rq); bfqq = RQ_BFQQ(rq); if (bfqq) { @@ -4670,6 +5240,17 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) */ bfqq->dispatched++; + /* + * TESTING: reset DISP_LIST flag, because: 1) + * this rq this request has passed through + * bfq_prepare_request, 2) then it will have + * bfq_finish_requeue_request invoked on it, and 3) in + * bfq_finish_requeue_request we use this flag to check + * that bfq_finish_requeue_request is not invoked on + * requests for which bfq_prepare_request has + * been invoked. + */ + rq->rq_flags &= ~RQF_DISP_LIST; goto inc_in_driver_start_rq; } @@ -4699,8 +5280,7 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) goto start_rq; } - bfq_log(bfqd, "dispatch requests: %d busy queues", - bfq_tot_busy_queues(bfqd)); + bfq_log(bfqd, "%d busy queues", bfq_tot_busy_queues(bfqd)); if (bfq_tot_busy_queues(bfqd) == 0) goto exit; @@ -4724,14 +5304,36 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) if (!bfqq) goto exit; + BFQ_BUG_ON(bfqq == bfqd->in_service_queue && + bfqq->entity.budget < bfqq->entity.service); + + BFQ_BUG_ON(bfqq == bfqd->in_service_queue && + bfq_bfqq_wait_request(bfqq)); + rq = bfq_dispatch_rq_from_bfqq(bfqd, bfqq); + BFQ_BUG_ON(bfqq->entity.budget < bfqq->entity.service); + if (rq) { inc_in_driver_start_rq: bfqd->rq_in_driver++; start_rq: rq->rq_flags |= RQF_STARTED; - } + if (bfqq) + bfq_log_bfqq(bfqd, bfqq, + "%s request %p (%u), rq_in_driver %d", + bfq_bfqq_sync(bfqq) ? "sync" : "async", + rq, blk_rq_sectors(rq), + bfqd->rq_in_driver); + else + bfq_log(bfqd, + "request %p from dispatch list, rq_in_driver %d", + rq, bfqd->rq_in_driver); + } else + bfq_log(bfqd, + "returned NULL request, rq_in_driver %d", + bfqd->rq_in_driver); + exit: return rq; } @@ -4805,6 +5407,8 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) idle_timer_disabled = waiting_rq && !bfq_bfqq_wait_request(in_serv_queue); + BFQ_BUG_ON(bfq_tot_busy_queues(bfqd) <= 0 && bfqd->queued > 0); + spin_unlock_irq(&bfqd->lock); bfq_update_dispatch_stats(hctx->queue, rq, in_serv_queue, @@ -4826,14 +5430,26 @@ void bfq_put_queue(struct bfq_queue *bfqq) struct hlist_node *n; struct bfq_group *bfqg = bfqq_group(bfqq); + assert_spin_locked(&bfqq->bfqd->lock); + + BFQ_BUG_ON(bfqq->ref <= 0); + if (bfqq->bfqd) - bfq_log_bfqq(bfqq->bfqd, bfqq, "put_queue: %p %d", - bfqq, bfqq->ref); + bfq_log_bfqq(bfqq->bfqd, bfqq, "%p %d", bfqq, bfqq->ref); bfqq->ref--; + bfqq_process_refs(bfqq); // DEBUG: check process ref consistency + if (bfqq->ref) return; + BFQ_BUG_ON(rb_first(&bfqq->sort_list)); + BFQ_BUG_ON(bfqq->allocated != 0); + BFQ_BUG_ON(bfqq->entity.tree); + BFQ_BUG_ON(bfq_bfqq_busy(bfqq)); + BFQ_BUG_ON(bfqq->entity.on_st_or_in_serv); + BFQ_BUG_ON(bfqq->weight_counter != NULL); + if (!hlist_unhashed(&bfqq->burst_list_node)) { hlist_del_init(&bfqq->burst_list_node); /* @@ -4896,8 +5512,14 @@ void bfq_put_queue(struct bfq_queue *bfqq) if (bfqq->bfqd && bfqq->bfqd->last_completed_rq_bfqq == bfqq) bfqq->bfqd->last_completed_rq_bfqq = NULL; - kmem_cache_free(bfq_pool, bfqq); +#ifdef CONFIG_BFQ_GROUP_IOSCHED + bfq_log_bfqq(bfqq->bfqd, bfqq, "putting blkg and bfqg %p\n", bfqg); +#endif bfqg_and_blkg_put(bfqg); + if (bfqq->bfqd) + bfq_log_bfqq(bfqq->bfqd, bfqq, "%p freed", bfqq); + + kmem_cache_free(bfq_pool, bfqq); } static void bfq_put_cooperator(struct bfq_queue *bfqq) @@ -4926,7 +5548,7 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfq_schedule_dispatch(bfqd); } - bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, bfqq->ref); + bfq_log_bfqq(bfqd, bfqq, "%p, %d", bfqq, bfqq->ref); bfq_put_cooperator(bfqq); @@ -4948,6 +5570,7 @@ static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync) bfqq->bic = NULL; bfq_exit_bfqq(bfqd, bfqq); bic_set_bfqq(bic, NULL, is_sync); + BFQ_BUG_ON(bfq_tot_busy_queues(bfqd) <= 0 && bfqd->queued > 0); spin_unlock_irqrestore(&bfqd->lock, flags); } } @@ -4956,6 +5579,7 @@ static void bfq_exit_icq(struct io_cq *icq) { struct bfq_io_cq *bic = icq_to_bic(icq); + BFQ_BUG_ON(!bic); bfq_exit_icq_bfqq(bic, true); bfq_exit_icq_bfqq(bic, false); } @@ -4964,13 +5588,14 @@ static void bfq_exit_icq(struct io_cq *icq) * Update the entity prio values; note that the new values will not * be used until the next (re)activation. */ -static void -bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic) +static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, + struct bfq_io_cq *bic) { struct task_struct *tsk = current; int ioprio_class; struct bfq_data *bfqd = bfqq->bfqd; + WARN_ON(!bfqd); if (!bfqd) return; @@ -5036,6 +5661,9 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio) bfq_release_process_ref(bfqd, bfqq); bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic); bic_set_bfqq(bic, bfqq, false); + bfq_log_bfqq(bfqd, bfqq, + "bfqq %p %d", + bfqq, bfqq->ref); } bfqq = bic_to_bfqq(bic, true); @@ -5051,6 +5679,7 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, INIT_HLIST_NODE(&bfqq->burst_list_node); INIT_HLIST_NODE(&bfqq->woken_list_node); INIT_HLIST_HEAD(&bfqq->woken_list); + BFQ_BUG_ON(!hlist_unhashed(&bfqq->burst_list_node)); bfqq->ref = 0; bfqq->bfqd = bfqd; @@ -5118,7 +5747,7 @@ static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd, case IOPRIO_CLASS_IDLE: return &bfqg->async_idle_bfqq; default: - return NULL; + BUG(); } } @@ -5175,14 +5804,15 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, * guarantee that this queue is not freed * until its group goes away. */ - bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d", + bfq_log_bfqq(bfqd, bfqq, "bfqq not in async: %p, %d", bfqq, bfqq->ref); *async_bfqq = bfqq; } out: + bfqq->proc_ref++; /* get a process reference to this queue */ bfqq->ref++; /* get a process reference to this queue */ - bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq, bfqq->ref); + bfq_log_bfqq(bfqd, bfqq, "at end: %p, %d", bfqq, bfqq->ref); rcu_read_unlock(); return bfqq; } @@ -5191,11 +5821,19 @@ static void bfq_update_io_thinktime(struct bfq_data *bfqd, struct bfq_queue *bfqq) { struct bfq_ttime *ttime = &bfqq->ttime; - u64 elapsed = ktime_get_ns() - bfqq->ttime.last_end_request; + u64 elapsed; + /* + * We are really interested in how long it takes for the queue to + * become busy when there is no outstanding IO for this queue. So + * ignore cases when the bfq queue has already IO queued. + */ + if (bfqq->dispatched || bfq_bfqq_busy(bfqq)) + return; + elapsed = ktime_get_ns() - bfqq->ttime.last_end_request; elapsed = min_t(u64, elapsed, 2ULL * bfqd->bfq_slice_idle); - ttime->ttime_samples = (7*bfqq->ttime.ttime_samples + 256) / 8; + ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8; ttime->ttime_total = div_u64(7*ttime->ttime_total + 256*elapsed, 8); ttime->ttime_mean = div64_ul(ttime->ttime_total + 128, ttime->ttime_samples); @@ -5207,6 +5845,13 @@ bfq_update_io_seektime(struct bfq_data *bfqd, struct bfq_queue *bfqq, { bfqq->seek_history <<= 1; bfqq->seek_history |= BFQ_RQ_SEEKY(bfqd, bfqq->last_request_pos, rq); + bfq_log_bfqq(bfqd, bfqq, + "rq %p, distant %d, small %d, hist %x (%u), tot_seeky %d", + rq, get_sdist(bfqq->last_request_pos, rq) > BFQQ_SEEK_THR, + blk_rq_sectors(rq) < BFQQ_SECT_THR_NONROT, + bfqq->seek_history, + hweight32(bfqq->seek_history), + BFQQ_TOTALLY_SEEKY(bfqq)); if (bfqq->wr_coeff > 1 && bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time && @@ -5245,6 +5890,9 @@ static void bfq_update_has_short_ttime(struct bfq_data *bfqd, state_changed = has_short_ttime != bfq_bfqq_has_short_ttime(bfqq); + bfq_log_bfqq(bfqd, bfqq, "has_short_ttime %d, changed %d", + has_short_ttime, state_changed); + if (has_short_ttime) bfq_mark_bfqq_has_short_ttime(bfqq); else @@ -5407,14 +6055,26 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) struct bfq_queue *bfqq = RQ_BFQQ(rq), *new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true); bool waiting, idle_timer_disabled = false; + BFQ_BUG_ON(!bfqq); + BFQ_BUG_ON(new_bfqq == &bfqd->oom_bfqq); + + assert_spin_locked(&bfqd->lock); + bfq_log_bfqq(bfqd, bfqq, "rq %p bfqq %p", rq, bfqq); if (new_bfqq) { + BFQ_BUG_ON(bic_to_bfqq(RQ_BIC(rq), 1) != bfqq); /* * Release the request's reference to the old bfqq * and make sure one is taken to the shared queue. */ new_bfqq->allocated++; bfqq->allocated--; + bfq_log_bfqq(bfqd, bfqq, + "new allocated %d", bfqq->allocated); + bfq_log_bfqq(bfqd, new_bfqq, + "new_bfqq new allocated %d", + bfqq->allocated); + new_bfqq->ref++; /* * If the bic associated with the process @@ -5442,6 +6102,10 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) bfq_update_has_short_ttime(bfqd, bfqq, RQ_BIC(rq)); bfq_update_io_seektime(bfqd, bfqq, rq); + bfq_log_bfqq(bfqd, bfqq, + "has_short_ttime=%d (seeky %d)", + bfq_bfqq_has_short_ttime(bfqq), BFQQ_SEEKY(bfqq)); + waiting = bfqq && bfq_bfqq_wait_request(bfqq); bfq_add_request(rq); idle_timer_disabled = waiting && !bfq_bfqq_wait_request(bfqq); @@ -5500,6 +6164,8 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, bfqg_stats_update_legacy_io(q, rq); #endif spin_lock_irq(&bfqd->lock); + BFQ_BUG_ON(bfq_tot_busy_queues(bfqd) <= 0 && bfqd->queued > 0); + if (blk_mq_sched_try_insert_merge(q, rq)) { spin_unlock_irq(&bfqd->lock); return; @@ -5511,12 +6177,29 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, spin_lock_irq(&bfqd->lock); bfqq = bfq_init_rq(rq); + BFQ_BUG_ON(!bfqq && !(at_head || blk_rq_is_passthrough(rq))); + BFQ_BUG_ON(bfqq && bic_to_bfqq(RQ_BIC(rq), rq_is_sync(rq)) != bfqq); + if (!bfqq || at_head || blk_rq_is_passthrough(rq)) { if (at_head) list_add(&rq->queuelist, &bfqd->dispatch); else list_add_tail(&rq->queuelist, &bfqd->dispatch); + + rq->rq_flags |= RQF_DISP_LIST; + if (bfqq) + bfq_log_bfqq(bfqd, bfqq, + "%p in disp: at_head %d", + rq, at_head); + else + bfq_log(bfqd, + "%p in disp: at_head %d", + rq, at_head); } else { + BFQ_BUG_ON(!bfqq); + BFQ_BUG_ON(!(rq->rq_flags & RQF_GOT)); + rq->rq_flags &= ~RQF_GOT; + idle_timer_disabled = __bfq_insert_request(bfqd, rq); /* * Update bfqq, because, if a queue merge has occurred @@ -5539,6 +6222,8 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, */ cmd_flags = rq->cmd_flags; + BFQ_BUG_ON(bfq_tot_busy_queues(bfqd) <= 0 && bfqd->queued > 0); + spin_unlock_irq(&bfqd->lock); bfq_update_insert_stats(q, bfqq, idle_timer_disabled, @@ -5554,6 +6239,7 @@ static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx, rq = list_first_entry(list, struct request, queuelist); list_del_init(&rq->queuelist); bfq_insert_request(hctx, rq, at_head); + atomic_inc(&hctx->elevator_queued); } } @@ -5605,10 +6291,19 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) bfq_update_hw_tag(bfqd); + BFQ_BUG_ON(!bfqd->rq_in_driver); + BFQ_BUG_ON(!bfqq->dispatched); bfqd->rq_in_driver--; + bfqq->dispatched--; + bfq_log_bfqq(bfqd, bfqq, + "in_serv %d, new disp %d, new rq_in_driver %d", + bfqq == bfqd->in_service_queue, + bfqq->dispatched, bfqd->rq_in_driver); + if (!bfqq->dispatched && !bfq_bfqq_busy(bfqq)) { + BFQ_BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); /* * Set budget_timeout (which we overload to store the * time at which the queue remains with no backlog and @@ -5622,6 +6317,9 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) now_ns = ktime_get_ns(); + bfq_log_bfqq(bfqd, bfqq, "rq completion time: %llu us", + div_u64(now_ns - bfqd->last_dispatch, NSEC_PER_USEC)); + bfqq->ttime.last_end_request = now_ns; /* @@ -5630,6 +6328,17 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) */ delta_us = div_u64(now_ns - bfqd->last_completion, NSEC_PER_USEC); + bfq_log_bfqq(bfqd, bfqq, + "delta %uus/%luus max_size %u rate %llu/%llu", + delta_us, BFQ_MIN_TT/NSEC_PER_USEC, bfqd->last_rq_max_size, + delta_us > 0 ? + (USEC_PER_SEC* + (u64)((bfqd->last_rq_max_size<>BFQ_RATE_SHIFT : + (USEC_PER_SEC* + (u64)(bfqd->last_rq_max_size<>BFQ_RATE_SHIFT, + (USEC_PER_SEC*(u64)(1UL<<(BFQ_RATE_SHIFT-10)))>>BFQ_RATE_SHIFT); + /* * If the request took rather long to complete, and, according * to the maximum request size recorded, this completion latency @@ -5718,6 +6427,9 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) static void bfq_finish_requeue_request_body(struct bfq_queue *bfqq) { + bfq_log_bfqq(bfqq->bfqd, bfqq, + "allocated %d", bfqq->allocated); + BFQ_BUG_ON(!bfqq->allocated); bfqq->allocated--; bfq_put_queue(bfqq); @@ -5833,6 +6545,12 @@ static void bfq_update_inject_limit(struct bfq_data *bfqd, u64 tot_time_ns = ktime_get_ns() - bfqd->last_empty_occupied_ns; unsigned int old_limit = bfqq->inject_limit; + bfq_log_bfqq(bfqd, bfqq, + "tot_time_ns %llu last_serv_time_ns %llu old limit %u", + tot_time_ns, bfqq->last_serv_time_ns, old_limit); + + bfq_log_bfqq(bfqd, bfqq, "rq_in_driver %d", bfqd->rq_in_driver); + if (bfqq->last_serv_time_ns > 0 && bfqd->rqs_injected) { u64 threshold = (bfqq->last_serv_time_ns * 3)>>1; @@ -5842,7 +6560,15 @@ static void bfq_update_inject_limit(struct bfq_data *bfqd, } else if (tot_time_ns < threshold && old_limit <= bfqd->max_rq_in_driver) bfqq->inject_limit++; + + bfq_log_bfqq(bfqd, bfqq, + "tot_time_ns %llu last_serv_time_ns %llu new limit %u", + tot_time_ns, bfqq->last_serv_time_ns, + bfqq->inject_limit); } + BUG_ON(bfqq->last_serv_time_ns == 0 && old_limit > 1); + + BUG_ON(bfqd->rq_in_driver < 1); /* * Either we still have to compute the base value for the @@ -5892,30 +6618,42 @@ static void bfq_update_inject_limit(struct bfq_data *bfqd, */ static void bfq_finish_requeue_request(struct request *rq) { - struct bfq_queue *bfqq = RQ_BFQQ(rq); + struct bfq_queue *bfqq; struct bfq_data *bfqd; + struct bfq_io_cq *bic; - /* - * Requeue and finish hooks are invoked in blk-mq without - * checking whether the involved request is actually still - * referenced in the scheduler. To handle this fact, the - * following two checks make this function exit in case of - * spurious invocations, for which there is nothing to do. - * - * First, check whether rq has nothing to do with an elevator. - */ - if (unlikely(!(rq->rq_flags & RQF_ELVPRIV))) - return; + BFQ_BUG_ON(!rq); + + bfqq = RQ_BFQQ(rq); /* * rq either is not associated with any icq, or is an already * requeued request that has not (yet) been re-inserted into * a bfq_queue. */ - if (!rq->elv.icq || !bfqq) + if (!rq->elv.icq || !bfqq) { +#ifndef CONFIG_BFQ_MQ_NOLOG_BUG_ON + trace_printk("bfq_finish_requeue_request exiting %p %p %p", + rq, rq->elv.icq, bfqq); +#endif return; + } + + bic = RQ_BIC(rq); + BFQ_BUG_ON(!bic); bfqd = bfqq->bfqd; + BFQ_BUG_ON(!bfqd); + + if (rq->rq_flags & RQF_DISP_LIST) { + pr_crit("putting disp rq %p for %d", rq, bfqq->pid); + BUG(); + } + + bfq_log_bfqq(bfqd, bfqq, + "putting rq %p with %u sects left, STARTED %d", + rq, blk_rq_sectors(rq), + rq->rq_flags & RQF_STARTED); if (rq->rq_flags & RQF_STARTED) bfqg_stats_update_completion(bfqq_group(bfqq), @@ -5923,6 +6661,8 @@ static void bfq_finish_requeue_request(struct request *rq) rq->io_start_time_ns, rq->cmd_flags); + WARN_ON(blk_rq_sectors(rq) == 0 && !(rq->rq_flags & RQF_STARTED)); + if (likely(rq->rq_flags & RQF_STARTED)) { unsigned long flags; @@ -5933,6 +6673,9 @@ static void bfq_finish_requeue_request(struct request *rq) bfq_completed_request(bfqq, bfqd); bfq_finish_requeue_request_body(bfqq); + atomic_dec(&rq->mq_hctx->elevator_queued); + + BFQ_BUG_ON(bfq_tot_busy_queues(bfqd) <= 0 && bfqd->queued > 0); spin_unlock_irqrestore(&bfqd->lock, flags); } else { @@ -5948,7 +6691,9 @@ static void bfq_finish_requeue_request(struct request *rq) * current version of the code, this implies that the * lock is held. */ + BFQ_BUG_ON(in_interrupt()); + assert_spin_locked(&bfqd->lock); if (!RB_EMPTY_NODE(&rq->rb_node)) { bfq_remove_request(rq->q, rq); bfqg_stats_update_io_remove(bfqq_group(bfqq), @@ -6021,13 +6766,28 @@ static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd, if (bfqq) bfq_put_queue(bfqq); bfqq = bfq_get_queue(bfqd, bio, is_sync, bic); + BFQ_BUG_ON(!hlist_unhashed(&bfqq->burst_list_node)); bic_set_bfqq(bic, bfqq, is_sync); if (split && is_sync) { + bfq_log_bfqq(bfqd, bfqq, + "get_request: was_in_list %d " + "was_in_large_burst %d " + "large burst in progress %d", + bic->was_in_burst_list, + bic->saved_in_large_burst, + bfqd->large_burst); + if ((bic->was_in_burst_list && bfqd->large_burst) || - bic->saved_in_large_burst) + bic->saved_in_large_burst) { + bfq_log_bfqq(bfqd, bfqq, + "get_request: marking in " + "large burst"); bfq_mark_bfqq_in_large_burst(bfqq); - else { + } else { + bfq_log_bfqq(bfqd, bfqq, + "get_request: clearing in " + "large burst"); bfq_clear_bfqq_in_large_burst(bfqq); if (bic->was_in_burst_list) /* @@ -6127,8 +6887,10 @@ static struct bfq_queue *bfq_init_rq(struct request *rq) * events, a request cannot be manipulated any longer before * being removed from bfq. */ - if (rq->elv.priv[1]) + if (rq->elv.priv[1]) { + BFQ_BUG_ON(!(rq->rq_flags & RQF_ELVPRIV)); return rq->elv.priv[1]; + } bic = icq_to_bic(rq->elv.icq); @@ -6142,6 +6904,8 @@ static struct bfq_queue *bfq_init_rq(struct request *rq) if (likely(!new_queue)) { /* If the queue was seeky for too long, break it apart. */ if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) { + BFQ_BUG_ON(bfqq == &bfqd->oom_bfqq); + BFQ_BUG_ON(!is_sync); bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq"); /* Update bic before losing reference to bfqq */ @@ -6157,16 +6921,21 @@ static struct bfq_queue *bfq_init_rq(struct request *rq) NULL); else bfqq_already_existing = true; + + BFQ_BUG_ON(!bfqq); } } bfqq->allocated++; + bfq_log_bfqq(bfqq->bfqd, bfqq, + "new allocated %d", bfqq->allocated); + bfqq->ref++; - bfq_log_bfqq(bfqd, bfqq, "get_request %p: bfqq %p, %d", - rq, bfqq, bfqq->ref); + bfq_log_bfqq(bfqd, bfqq, "%p: bfqq %p, %d", rq, bfqq, bfqq->ref); rq->elv.priv[0] = bic; rq->elv.priv[1] = bfqq; + rq->rq_flags &= ~RQF_DISP_LIST; /* * If a bfq_queue has only one process reference, it is owned @@ -6212,23 +6981,36 @@ static struct bfq_queue *bfq_init_rq(struct request *rq) bfq_tot_busy_queues(bfqd) == 0))) bfq_handle_burst(bfqd, bfqq); + rq->rq_flags |= RQF_GOT; + return bfqq; } -static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq) +static void +bfq_idle_slice_timer_body(struct bfq_data *bfqd, struct bfq_queue *bfqq) { - struct bfq_data *bfqd = bfqq->bfqd; enum bfqq_expiration reason; unsigned long flags; + BFQ_BUG_ON(!bfqd); spin_lock_irqsave(&bfqd->lock, flags); - bfq_clear_bfqq_wait_request(bfqq); + bfq_log_bfqq(bfqd, bfqq, "handling slice_timer expiration"); + + /* + * Considering that bfqq may be in race, we should firstly check + * whether bfqq is in service before doing something on it. If + * the bfqq in race is not in service, it has already been expired + * through __bfq_bfqq_expire func and its wait_request flags has + * been cleared in __bfq_bfqd_reset_in_service func. + */ if (bfqq != bfqd->in_service_queue) { spin_unlock_irqrestore(&bfqd->lock, flags); return; } + bfq_clear_bfqq_wait_request(bfqq); + if (bfq_bfqq_budget_timeout(bfqq)) /* * Also here the queue can be safely expired @@ -6264,6 +7046,8 @@ static enum hrtimer_restart bfq_idle_slice_timer(struct hrtimer *timer) idle_slice_timer); struct bfq_queue *bfqq = bfqd->in_service_queue; + bfq_log(bfqd, "expired"); + /* * Theoretical race here: the in-service queue can be NULL or * different from the queue that was idling if a new request @@ -6273,7 +7057,7 @@ static enum hrtimer_restart bfq_idle_slice_timer(struct hrtimer *timer) * early. */ if (bfqq) - bfq_idle_slice_timer_body(bfqq); + bfq_idle_slice_timer_body(bfqd, bfqq); return HRTIMER_NORESTART; } @@ -6283,11 +7067,11 @@ static void __bfq_put_async_bfqq(struct bfq_data *bfqd, { struct bfq_queue *bfqq = *bfqq_ptr; - bfq_log(bfqd, "put_async_bfqq: %p", bfqq); + bfq_log(bfqd, "%p", bfqq); if (bfqq) { bfq_bfqq_move(bfqd, bfqq, bfqd->root_group); - bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d", + bfq_log_bfqq(bfqd, bfqq, "putting %p, %d", bfqq, bfqq->ref); bfq_put_queue(bfqq); *bfqq_ptr = NULL; @@ -6323,21 +7107,15 @@ static unsigned int bfq_update_depths(struct bfq_data *bfqd, /* * In-word depths if no bfq_queue is being weight-raised: * leaving 25% of tags only for sync reads. - * - * In next formulas, right-shift the value - * (1U<sb.shift), instead of computing directly - * (1U<<(bt->sb.shift - something)), to be robust against - * any possible value of bt->sb.shift, without having to - * limit 'something'. */ /* no more than 50% of tags for async I/O */ - bfqd->word_depths[0][0] = max((1U << bt->sb.shift) >> 1, 1U); + bfqd->word_depths[0][0] = max(bt->sb.depth >> 1, 1U); /* * no more than 75% of tags for sync writes (25% extra tags * w.r.t. async I/O, to prevent async I/O from starving sync * writes) */ - bfqd->word_depths[0][1] = max(((1U << bt->sb.shift) * 3) >> 2, 1U); + bfqd->word_depths[0][1] = max((bt->sb.depth * 3) >> 2, 1U); /* * In-word depths in case some bfq_queue is being weight- @@ -6347,9 +7125,9 @@ static unsigned int bfq_update_depths(struct bfq_data *bfqd, * shortage. */ /* no more than ~18% of tags for async I/O */ - bfqd->word_depths[1][0] = max(((1U << bt->sb.shift) * 3) >> 4, 1U); + bfqd->word_depths[1][0] = max((bt->sb.depth * 3) >> 4, 1U); /* no more than ~37% of tags for sync writes (~20% extra tags) */ - bfqd->word_depths[1][1] = max(((1U << bt->sb.shift) * 6) >> 4, 1U); + bfqd->word_depths[1][1] = max((bt->sb.depth * 6) >> 4, 1U); for (i = 0; i < 2; i++) for (j = 0; j < 2; j++) @@ -6379,8 +7157,13 @@ static void bfq_exit_queue(struct elevator_queue *e) struct bfq_data *bfqd = e->elevator_data; struct bfq_queue *bfqq, *n; + bfq_log(bfqd, "starting ..."); + hrtimer_cancel(&bfqd->idle_slice_timer); + BFQ_BUG_ON(bfqd->in_service_queue); + BFQ_BUG_ON(!list_empty(&bfqd->active_list)); + spin_lock_irq(&bfqd->lock); list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list) bfq_deactivate_bfqq(bfqd, bfqq, false, false); @@ -6388,6 +7171,8 @@ static void bfq_exit_queue(struct elevator_queue *e) hrtimer_cancel(&bfqd->idle_slice_timer); + BFQ_BUG_ON(hrtimer_active(&bfqd->idle_slice_timer)); + /* release oom-queue reference to root group */ bfqg_and_blkg_put(bfqd->root_group); @@ -6400,6 +7185,7 @@ static void bfq_exit_queue(struct elevator_queue *e) spin_unlock_irq(&bfqd->lock); #endif + bfq_log(bfqd, "finished ..."); kfree(bfqd); } @@ -6445,6 +7231,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) * will not attempt to free it. */ bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, NULL, 1, 0); + bfqd->oom_bfqq.proc_ref++; bfqd->oom_bfqq.ref++; bfqd->oom_bfqq.new_ioprio = BFQ_DEFAULT_QUEUE_IOPRIO; bfqd->oom_bfqq.new_ioprio_class = IOPRIO_CLASS_BE; @@ -6471,6 +7258,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) bfqd->queue_weights_tree = RB_ROOT_CACHED; bfqd->num_groups_with_pending_reqs = 0; + bfqd->check_active_group = false; INIT_LIST_HEAD(&bfqd->active_list); INIT_LIST_HEAD(&bfqd->idle_list); @@ -6580,6 +7368,58 @@ static int bfq_var_store(unsigned long *var, const char *page) return 0; } +static ssize_t bfq_wr_max_time_show(struct elevator_queue *e, char *page) +{ + struct bfq_data *bfqd = e->elevator_data; + + return sprintf(page, "%d\n", bfqd->bfq_wr_max_time > 0 ? + jiffies_to_msecs(bfqd->bfq_wr_max_time) : + jiffies_to_msecs(bfq_wr_duration(bfqd))); +} + +static ssize_t bfq_weights_show(struct elevator_queue *e, char *page) +{ + struct bfq_queue *bfqq; + struct bfq_data *bfqd = e->elevator_data; + ssize_t num_char = 0; + + num_char += sprintf(page + num_char, "Tot reqs queued %d\n\n", + bfqd->queued); + + spin_lock_irq(&bfqd->lock); + + num_char += sprintf(page + num_char, "Active:\n"); + list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) { + num_char += sprintf(page + num_char, + "pid%d: weight %hu, nr_queued %d %d, ", + bfqq->pid, + bfqq->entity.weight, + bfqq->queued[0], + bfqq->queued[1]); + num_char += sprintf(page + num_char, + "dur %d/%u\n", + jiffies_to_msecs( + jiffies - + bfqq->last_wr_start_finish), + jiffies_to_msecs(bfqq->wr_cur_max_time)); + } + + num_char += sprintf(page + num_char, "Idle:\n"); + list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) { + num_char += sprintf(page + num_char, + "pid%d: weight %hu, dur %d/%u\n", + bfqq->pid, + bfqq->entity.weight, + jiffies_to_msecs(jiffies - + bfqq->last_wr_start_finish), + jiffies_to_msecs(bfqq->wr_cur_max_time)); + } + + spin_unlock_irq(&bfqd->lock); + + return num_char; +} + #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ static ssize_t __FUNC(struct elevator_queue *e, char *page) \ { \ @@ -6600,6 +7440,12 @@ SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0); SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout, 1); SHOW_FUNCTION(bfq_strict_guarantees_show, bfqd->strict_guarantees, 0); SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0); +SHOW_FUNCTION(bfq_wr_coeff_show, bfqd->bfq_wr_coeff, 0); +SHOW_FUNCTION(bfq_wr_rt_max_time_show, bfqd->bfq_wr_rt_max_time, 1); +SHOW_FUNCTION(bfq_wr_min_idle_time_show, bfqd->bfq_wr_min_idle_time, 1); +SHOW_FUNCTION(bfq_wr_min_inter_arr_async_show, bfqd->bfq_wr_min_inter_arr_async, + 1); +SHOW_FUNCTION(bfq_wr_max_softrt_rate_show, bfqd->bfq_wr_max_softrt_rate, 0); #undef SHOW_FUNCTION #define USEC_SHOW_FUNCTION(__FUNC, __VAR) \ @@ -6644,6 +7490,16 @@ STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0); STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1, INT_MAX, 0); STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 2); +STORE_FUNCTION(bfq_wr_coeff_store, &bfqd->bfq_wr_coeff, 1, INT_MAX, 0); +STORE_FUNCTION(bfq_wr_max_time_store, &bfqd->bfq_wr_max_time, 0, INT_MAX, 1); +STORE_FUNCTION(bfq_wr_rt_max_time_store, &bfqd->bfq_wr_rt_max_time, 0, INT_MAX, + 1); +STORE_FUNCTION(bfq_wr_min_idle_time_store, &bfqd->bfq_wr_min_idle_time, 0, + INT_MAX, 1); +STORE_FUNCTION(bfq_wr_min_inter_arr_async_store, + &bfqd->bfq_wr_min_inter_arr_async, 0, INT_MAX, 1); +STORE_FUNCTION(bfq_wr_max_softrt_rate_store, &bfqd->bfq_wr_max_softrt_rate, 0, + INT_MAX, 0); #undef STORE_FUNCTION #define USEC_STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \ @@ -6667,6 +7523,13 @@ USEC_STORE_FUNCTION(bfq_slice_idle_us_store, &bfqd->bfq_slice_idle, 0, UINT_MAX); #undef USEC_STORE_FUNCTION +/* do nothing for the moment */ +static ssize_t bfq_weights_store(struct elevator_queue *e, + const char *page, size_t count) +{ + return count; +} + static ssize_t bfq_max_budget_store(struct elevator_queue *e, const char *page, size_t count) { @@ -6774,6 +7637,13 @@ static struct elv_fs_entry bfq_attrs[] = { BFQ_ATTR(timeout_sync), BFQ_ATTR(strict_guarantees), BFQ_ATTR(low_latency), + BFQ_ATTR(wr_coeff), + BFQ_ATTR(wr_max_time), + BFQ_ATTR(wr_rt_max_time), + BFQ_ATTR(wr_min_idle_time), + BFQ_ATTR(wr_min_inter_arr_async), + BFQ_ATTR(wr_max_softrt_rate), + BFQ_ATTR(weights), __ATTR_NULL }; @@ -6811,6 +7681,7 @@ MODULE_ALIAS("bfq-iosched"); static int __init bfq_init(void) { int ret; + char msg[60] = "BFQ-dev I/O-scheduler: v14"; #ifdef CONFIG_BFQ_GROUP_IOSCHED ret = blkcg_policy_register(&blkcg_policy_bfq); @@ -6842,6 +7713,11 @@ static int __init bfq_init(void) if (ret) goto slab_kill; +#ifdef CONFIG_BFQ_GROUP_IOSCHED + strcat(msg, " (with cgroups support)"); +#endif + pr_info("%s", msg); + return 0; slab_kill: diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index cd224aaf9..bc4897c2a 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -230,6 +230,8 @@ struct bfq_ttime { struct bfq_queue { /* reference counter */ int ref; + /* DEBUG: process ref counter */ + int proc_ref; /* parent bfq_data */ struct bfq_data *bfqd; @@ -524,6 +526,8 @@ struct bfq_data { /* true if the device is non rotational and performs queueing */ bool nonrot_with_queueing; + /* true if need to check num_groups_with_pending_reqs */ + bool check_active_group; /* * Maximum number of requests in driver in the last @@ -736,6 +740,8 @@ struct bfq_data { struct bfq_io_cq *bio_bic; /* bfqq associated with the task issuing current bio for merging */ struct bfq_queue *bio_bfqq; + /* Extra flag used only for TESTING */ + bool bio_bfqq_set; /* * Depth limits used in bfq_limit_depth (see comments on the @@ -986,7 +992,6 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg); struct bfq_group *bfqq_group(struct bfq_queue *bfqq); struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node); -void bfqg_and_blkg_get(struct bfq_group *bfqg); void bfqg_and_blkg_put(struct bfq_group *bfqg); #ifdef CONFIG_BFQ_GROUP_IOSCHED @@ -1004,6 +1009,17 @@ extern struct blkcg_policy blkcg_policy_bfq; #define for_each_entity(entity) \ for (; entity ; entity = entity->parent) +static inline void bfqd_enable_active_group_check(struct bfq_data *bfqd) +{ + cmpxchg_relaxed(&bfqd->check_active_group, false, true); +} + +static inline bool bfqd_has_active_group(struct bfq_data *bfqd) +{ + return bfqd->check_active_group && + bfqd->num_groups_with_pending_reqs > 0; +} + /* * For each iteration, compute parent in advance, so as to be safe if * entity is deallocated during the iteration. Such a deallocation may @@ -1066,41 +1082,129 @@ static inline void bfq_pid_to_str(int pid, char *str, int len) snprintf(str, len, "SHARED-"); } +#ifdef CONFIG_BFQ_REDIRECT_TO_CONSOLE + +static const char *checked_dev_name(const struct device *dev) +{ + static const char nodev[] = "nodev"; + + if (dev) + return dev_name(dev); + + return nodev; +} + #ifdef CONFIG_BFQ_GROUP_IOSCHED -struct bfq_group *bfqq_group(struct bfq_queue *bfqq); -#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \ - char pid_str[MAX_PID_STR_LENGTH]; \ +#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \ + char pid_str[MAX_PID_STR_LENGTH]; \ if (likely(!blk_trace_note_message_enabled((bfqd)->queue))) \ break; \ - bfq_pid_to_str((bfqq)->pid, pid_str, MAX_PID_STR_LENGTH); \ - blk_add_cgroup_trace_msg((bfqd)->queue, \ - bfqg_to_blkg(bfqq_group(bfqq))->blkcg, \ - "bfq%s%c " fmt, pid_str, \ - bfq_bfqq_sync((bfqq)) ? 'S' : 'A', ##args); \ + bfq_pid_to_str((bfqq)->pid, pid_str, MAX_PID_STR_LENGTH); \ + pr_crit("%s bfq%s%c %s [%s] " fmt "\n", \ + checked_dev_name((bfqd)->queue->backing_dev_info->dev), \ + pid_str, \ + bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ + bfqq_group(bfqq)->blkg_path, __func__, ##args); \ } while (0) -#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \ - blk_add_cgroup_trace_msg((bfqd)->queue, \ - bfqg_to_blkg(bfqg)->blkcg, fmt, ##args); \ +#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \ + pr_crit("%s %s [%s] " fmt "\n", \ + checked_dev_name((bfqd)->queue->backing_dev_info->dev), \ + bfqg->blkg_path, __func__, ##args); \ } while (0) #else /* CONFIG_BFQ_GROUP_IOSCHED */ +static inline void bfqd_enable_active_group_check(struct bfq_data *bfqd) {} + +static inline bool bfqd_has_active_group(struct bfq_data *bfqd) +{ + return false; +} -#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \ - char pid_str[MAX_PID_STR_LENGTH]; \ +#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \ + char pid_str[MAX_PID_STR_LENGTH]; \ if (likely(!blk_trace_note_message_enabled((bfqd)->queue))) \ break; \ - bfq_pid_to_str((bfqq)->pid, pid_str, MAX_PID_STR_LENGTH); \ - blk_add_trace_msg((bfqd)->queue, "bfq%s%c " fmt, pid_str, \ - bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ - ##args); \ -} while (0) -#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {} while (0) + bfq_pid_to_str((bfqq)->pid, pid_str, MAX_PID_STR_LENGTH); \ + pr_crit("%s bfq%s%c %s [%s] " fmt "\n", \ + checked_dev_name((bfqd)->queue->backing_dev_info->dev), \ + pid_str, bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ + __func__, ##args); \ + } while (0) +#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {} while (0) + +#endif /* CONFIG_BFQ_GROUP_IOSCHED */ +#define bfq_log(bfqd, fmt, args...) \ + pr_crit("%s bfq [%s] " fmt "\n", \ + checked_dev_name((bfqd)->queue->backing_dev_info->dev), \ + __func__, ##args) + +#else /* CONFIG_BFQ_REDIRECT_TO_CONSOLE */ + +#if defined(CONFIG_BFQ_MQ_NOLOG_BUG_ON) || !defined(CONFIG_BLK_DEV_IO_TRACE) + +/* Avoid possible "unused-variable" warning. See commit message. */ + +#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) ((void) (bfqq)) + +#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) ((void) (bfqg)) + +#define bfq_log(bfqd, fmt, args...) do {} while (0) + +#else /* CONFIG_BLK_DEV_IO_TRACE */ + +#include + +#ifdef CONFIG_BFQ_GROUP_IOSCHED + +#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \ + char pid_str[MAX_PID_STR_LENGTH]; \ + if (likely(!blk_trace_note_message_enabled((bfqd)->queue))) \ + break; \ + bfq_pid_to_str((bfqq)->pid, pid_str, MAX_PID_STR_LENGTH); \ + blk_add_trace_msg((bfqd)->queue, "bfq%s%c %s [%s] " fmt, \ + pid_str, \ + bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ + bfqq_group(bfqq)->blkg_path, __func__, ##args); \ + } while (0) + +#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \ + if (likely(!blk_trace_note_message_enabled((bfqd)->queue))) \ + break; \ + blk_add_trace_msg((bfqd)->queue, "%s [%s] " fmt, bfqg->blkg_path, \ + __func__, ##args); \ + } while (0) + +#else /* CONFIG_BFQ_GROUP_IOSCHED */ + +#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \ + char pid_str[MAX_PID_STR_LENGTH]; \ + if (likely(!blk_trace_note_message_enabled((bfqd)->queue))) \ + break; \ + bfq_pid_to_str((bfqq)->pid, pid_str, MAX_PID_STR_LENGTH); \ + blk_add_trace_msg((bfqd)->queue, "bfq%s%c [%s] " fmt, pid_str, \ + bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \ + __func__, ##args); \ + } while (0) +#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {} while (0) #endif /* CONFIG_BFQ_GROUP_IOSCHED */ -#define bfq_log(bfqd, fmt, args...) \ - blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args) +#define bfq_log(bfqd, fmt, args...) do { \ + if (likely(!blk_trace_note_message_enabled((bfqd)->queue))) \ + break; \ + blk_add_trace_msg((bfqd)->queue, "bfq [%s] " fmt, __func__, ##args); \ + } while (0) + +#endif /* CONFIG_BLK_DEV_IO_TRACE */ +#endif /* CONFIG_BFQ_REDIRECT_TO_CONSOLE */ + +#if defined(CONFIG_BFQ_MQ_NOLOG_BUG_ON) +/* Avoid possible "unused-variable" warning. */ +#define BFQ_BUG_ON(cond) ((void) (cond)) +#else +#define BFQ_BUG_ON(cond) BUG_ON(cond) +#endif #endif /* _BFQ_H */ diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index eb0e2a6da..1076bb98e 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -76,6 +76,7 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd, bool expiration) { struct bfq_entity *next_in_service = sd->next_in_service; + struct bfq_queue *bfqq; bool parent_sched_may_change = false; bool change_without_lookup = false; @@ -120,8 +121,24 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd, new_entity->finish)); } - if (change_without_lookup) + if (change_without_lookup) { next_in_service = new_entity; + bfqq = bfq_entity_to_bfqq(next_in_service); + + if (bfqq) + bfq_log_bfqq(bfqq->bfqd, bfqq, + "chose without lookup"); +#ifdef CONFIG_BFQ_GROUP_IOSCHED + else { + struct bfq_group *bfqg = + container_of(next_in_service, + struct bfq_group, entity); + + bfq_log_bfqg((struct bfq_data*)bfqg->bfqd, bfqg, + "chose without lookup"); + } +#endif + } } if (!change_without_lookup) /* lookup needed */ @@ -140,6 +157,20 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd, if (!next_in_service) return parent_sched_may_change; + bfqq = bfq_entity_to_bfqq(next_in_service); + if (bfqq) + bfq_log_bfqq(bfqq->bfqd, bfqq, + "chosen this queue"); +#ifdef CONFIG_BFQ_GROUP_IOSCHED + else { + struct bfq_group *bfqg = + container_of(next_in_service, + struct bfq_group, entity); + + bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, + "chosen this entity"); + } +#endif return parent_sched_may_change; } @@ -166,6 +197,8 @@ static bool bfq_update_parent_budget(struct bfq_entity *next_in_service) struct bfq_sched_data *group_sd; bool ret = false; + BFQ_BUG_ON(!next_in_service); + group_sd = next_in_service->sched_data; bfqg = container_of(group_sd, struct bfq_group, sched_data); @@ -178,6 +211,9 @@ static bool bfq_update_parent_budget(struct bfq_entity *next_in_service) if (bfqg_entity) { if (bfqg_entity->budget > next_in_service->budget) ret = true; + bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, + "old budg: %d, new budg: %d", + bfqg_entity->budget, next_in_service->budget); bfqg_entity->budget = next_in_service->budget; } @@ -213,6 +249,8 @@ static bool bfq_no_longer_next_in_service(struct bfq_entity *entity) bfqg = container_of(entity, struct bfq_group, entity); + BFQ_BUG_ON(bfqg == ((struct bfq_data *)(bfqg->bfqd))->root_group); + BFQ_BUG_ON(bfqg->active_entities == 0); /* * The field active_entities does not always contain the * actual number of active children entities: it happens to @@ -263,6 +301,8 @@ struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity) { struct bfq_queue *bfqq = NULL; + BFQ_BUG_ON(!entity); + if (!entity->my_sched_data) bfqq = container_of(entity, struct bfq_queue, entity); @@ -288,18 +328,36 @@ static u64 bfq_delta(unsigned long service, unsigned long weight) static void bfq_calc_finish(struct bfq_entity *entity, unsigned long service) { struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + unsigned long long start, finish, delta; + + BFQ_BUG_ON(entity->weight == 0); entity->finish = entity->start + bfq_delta(service, entity->weight); + start = ((entity->start>>10)*1000)>>12; + finish = ((entity->finish>>10)*1000)>>12; + delta = ((bfq_delta(service, entity->weight)>>10)*1000)>>12; + if (bfqq) { bfq_log_bfqq(bfqq->bfqd, bfqq, - "calc_finish: serv %lu, w %d", + "serv %lu, w %d", service, entity->weight); bfq_log_bfqq(bfqq->bfqd, bfqq, - "calc_finish: start %llu, finish %llu, delta %llu", - entity->start, entity->finish, - bfq_delta(service, entity->weight)); + "start %llu, finish %llu, delta %llu", + start, finish, delta); +#ifdef CONFIG_BFQ_GROUP_IOSCHED + } else { + struct bfq_group *bfqg = + container_of(entity, struct bfq_group, entity); + + bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, + "group: serv %lu, w %d", + service, entity->weight); + bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, + "group: start %llu, finish %llu, delta %llu", + start, finish, delta); +#endif } } @@ -329,6 +387,8 @@ struct bfq_entity *bfq_entity_of(struct rb_node *node) */ static void bfq_extract(struct rb_root *root, struct bfq_entity *entity) { + BFQ_BUG_ON(entity->tree != root); + entity->tree = NULL; rb_erase(&entity->rb_node, root); } @@ -344,6 +404,8 @@ static void bfq_idle_extract(struct bfq_service_tree *st, struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); struct rb_node *next; + BFQ_BUG_ON(entity->tree != &st->idle); + if (entity == st->first_idle) { next = rb_next(&entity->rb_node); st->first_idle = bfq_entity_of(next); @@ -374,6 +436,8 @@ static void bfq_insert(struct rb_root *root, struct bfq_entity *entity) struct rb_node **node = &root->rb_node; struct rb_node *parent = NULL; + BFQ_BUG_ON(entity->tree); + while (*node) { parent = *node; entry = rb_entry(parent, struct bfq_entity, rb_node); @@ -422,10 +486,26 @@ static void bfq_update_min(struct bfq_entity *entity, struct rb_node *node) static void bfq_update_active_node(struct rb_node *node) { struct bfq_entity *entity = rb_entry(node, struct bfq_entity, rb_node); + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); entity->min_start = entity->start; bfq_update_min(entity, node->rb_right); bfq_update_min(entity, node->rb_left); + + if (bfqq) { + bfq_log_bfqq(bfqq->bfqd, bfqq, + "new min_start %llu", + ((entity->min_start>>10)*1000)>>12); +#ifdef CONFIG_BFQ_GROUP_IOSCHED + } else { + struct bfq_group *bfqg = + container_of(entity, struct bfq_group, entity); + + bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, + "new min_start %llu", + ((entity->min_start>>10)*1000)>>12); +#endif + } } /** @@ -492,13 +572,17 @@ static void bfq_active_insert(struct bfq_service_tree *st, #ifdef CONFIG_BFQ_GROUP_IOSCHED sd = entity->sched_data; bfqg = container_of(sd, struct bfq_group, sched_data); + BFQ_BUG_ON(!bfqg); bfqd = (struct bfq_data *)bfqg->bfqd; #endif if (bfqq) list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list); #ifdef CONFIG_BFQ_GROUP_IOSCHED - if (bfqg != bfqd->root_group) + if (bfqg != bfqd->root_group) { + BFQ_BUG_ON(!bfqg); + BFQ_BUG_ON(!bfqd); bfqg->active_entities++; + } #endif } @@ -508,6 +592,7 @@ static void bfq_active_insert(struct bfq_service_tree *st, */ unsigned short bfq_ioprio_to_weight(int ioprio) { + BFQ_BUG_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR); return (IOPRIO_BE_NR - ioprio) * BFQ_WEIGHT_CONVERSION_COEFF; } @@ -521,6 +606,7 @@ unsigned short bfq_ioprio_to_weight(int ioprio) */ static unsigned short bfq_weight_to_ioprio(int weight) { + BFQ_BUG_ON(weight < BFQ_MIN_WEIGHT || weight > BFQ_MAX_WEIGHT); return max_t(int, 0, IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF - weight); } @@ -531,11 +617,9 @@ static void bfq_get_entity(struct bfq_entity *entity) if (bfqq) { bfqq->ref++; - bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d", + bfq_log_bfqq(bfqq->bfqd, bfqq, "%p %d", bfqq, bfqq->ref); - } else - bfqg_and_blkg_get(container_of(entity, struct bfq_group, - entity)); + } } /** @@ -593,13 +677,18 @@ static void bfq_active_extract(struct bfq_service_tree *st, #ifdef CONFIG_BFQ_GROUP_IOSCHED sd = entity->sched_data; bfqg = container_of(sd, struct bfq_group, sched_data); + BFQ_BUG_ON(!bfqg); bfqd = (struct bfq_data *)bfqg->bfqd; #endif if (bfqq) list_del(&bfqq->bfqq_list); #ifdef CONFIG_BFQ_GROUP_IOSCHED - if (bfqg != bfqd->root_group) + if (bfqg != bfqd->root_group) { + BFQ_BUG_ON(!bfqg); + BFQ_BUG_ON(!bfqd); + BFQ_BUG_ON(!bfqg->active_entities); bfqg->active_entities--; + } #endif } @@ -646,17 +735,15 @@ static void bfq_forget_entity(struct bfq_service_tree *st, bool is_in_service) { struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + BFQ_BUG_ON(!entity->on_st_or_in_serv); entity->on_st_or_in_serv = false; st->wsum -= entity->weight; - if (is_in_service) - return; - - if (bfqq) + if (bfqq && !is_in_service) { + bfq_log_bfqq(bfqq->bfqd, bfqq, "(before): %p %d", + bfqq, bfqq->ref); bfq_put_queue(bfqq); - else - bfqg_and_blkg_put(container_of(entity, struct bfq_group, - entity)); + } } /** @@ -699,8 +786,26 @@ static void bfq_forget_idle(struct bfq_service_tree *st) struct bfq_service_tree *bfq_entity_service_tree(struct bfq_entity *entity) { struct bfq_sched_data *sched_data = entity->sched_data; + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); unsigned int idx = bfq_class_idx(entity); + BFQ_BUG_ON(idx >= BFQ_IOPRIO_CLASSES); + BFQ_BUG_ON(sched_data == NULL); + + if (bfqq) + bfq_log_bfqq(bfqq->bfqd, bfqq, + "%p %d", + sched_data->service_tree + idx, idx); +#ifdef CONFIG_BFQ_GROUP_IOSCHED + else { + struct bfq_group *bfqg = + container_of(entity, struct bfq_group, entity); + + bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, + "%p %d", + sched_data->service_tree + idx, idx); + } +#endif return sched_data->service_tree + idx; } @@ -745,12 +850,17 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, else { sd = entity->my_sched_data; bfqg = container_of(sd, struct bfq_group, sched_data); + BFQ_BUG_ON(!bfqg); bfqd = (struct bfq_data *)bfqg->bfqd; + BFQ_BUG_ON(!bfqd); } #endif /* Matches the smp_wmb() in bfq_group_set_weight. */ smp_rmb(); + + BFQ_BUG_ON(entity->tree && update_class_too); + BFQ_BUG_ON(old_st->wsum < entity->weight); old_st->wsum -= entity->weight; if (entity->new_weight != entity->orig_weight) { @@ -812,8 +922,10 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, new_st->wsum += entity->weight; - if (new_st != old_st) + if (new_st != old_st) { + BFQ_BUG_ON(!update_class_too); entity->start = new_st->vtime; + } } return new_st; @@ -837,8 +949,11 @@ void bfq_bfqq_served(struct bfq_queue *bfqq, int served) if (!bfqq->service_from_backlogged) bfqq->first_IO_time = jiffies; - if (bfqq->wr_coeff > 1) + if (bfqq->wr_coeff > 1) { bfqq->service_from_wr += served; + bfq_log_bfqq(bfqq->bfqd, bfqq, "serv_from_wr: %lu", + bfqq->service_from_wr); + } bfqq->service_from_backlogged += served; for_each_entity(entity) { @@ -846,10 +961,14 @@ void bfq_bfqq_served(struct bfq_queue *bfqq, int served) entity->service += served; + BFQ_BUG_ON(st->wsum == 0); + st->vtime += bfq_delta(served, st->wsum); bfq_forget_idle(st); } - bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %d secs", served); + st = bfq_entity_service_tree(&bfqq->entity); + bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %d secs, vtime %llu on %p", + served, ((st->vtime>>10)*1000)>>12, st); } /** @@ -889,6 +1008,14 @@ void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq, (bfqd->bfq_max_budget * bounded_time_ms) / timeout_ms; int tot_serv_to_charge = max(serv_to_charge_for_time, entity->service); + bfq_log_bfqq(bfqq->bfqd, bfqq, + "%lu/%lu ms, %d/%d/%d/%d sectors", + time_ms, timeout_ms, + entity->service, + tot_serv_to_charge, + bfqd->bfq_max_budget, + entity->budget); + /* Increase budget to avoid inconsistencies */ if (tot_serv_to_charge > entity->budget) entity->budget = tot_serv_to_charge; @@ -902,12 +1029,14 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity, bool backshifted) { struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + struct bfq_sched_data *sd = entity->sched_data; /* * When this function is invoked, entity is not in any service * tree, then it is safe to invoke next function with the last * parameter set (see the comments on the function). */ + BFQ_BUG_ON(entity->tree); st = __bfq_entity_update_weight_prio(st, entity, true); bfq_calc_finish(entity, entity->budget); @@ -952,9 +1081,43 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity, entity->start += delta; entity->finish += delta; + + if (bfqq) { + bfq_log_bfqq(bfqq->bfqd, bfqq, + "new queue finish %llu", + ((entity->finish>>10)*1000)>>12); +#ifdef CONFIG_BFQ_GROUP_IOSCHED + } else { + struct bfq_group *bfqg = + container_of(entity, struct bfq_group, entity); + + bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, + "new group finish %llu", + ((entity->finish>>10)*1000)>>12); +#endif + } } bfq_active_insert(st, entity); + + if (bfqq) { + bfq_log_bfqq(bfqq->bfqd, bfqq, + "queue %seligible in st %p", + entity->start <= st->vtime ? "" : "non ", st); +#ifdef CONFIG_BFQ_GROUP_IOSCHED + } else { + struct bfq_group *bfqg = + container_of(entity, struct bfq_group, entity); + + bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, + "group %seligible in st %p", + entity->start <= st->vtime ? "" : "non ", st); +#endif + } + BFQ_BUG_ON(RB_EMPTY_ROOT(&st->active)); + BFQ_BUG_ON(&st->active != &sd->service_tree->active && + &st->active != &(sd->service_tree+1)->active && + &st->active != &(sd->service_tree+2)->active); } /** @@ -972,10 +1135,15 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity, static void __bfq_activate_entity(struct bfq_entity *entity, bool non_blocking_wait_rq) { + struct bfq_sched_data *sd = entity->sched_data; struct bfq_service_tree *st = bfq_entity_service_tree(entity); + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); bool backshifted = false; unsigned long long min_vstart; + BFQ_BUG_ON(!sd); + BFQ_BUG_ON(!st); + /* See comments on bfq_fqq_update_budg_for_activation */ if (non_blocking_wait_rq && bfq_gt(st->vtime, entity->finish)) { backshifted = true; @@ -989,9 +1157,11 @@ static void __bfq_activate_entity(struct bfq_entity *entity, * check for that. */ bfq_idle_extract(st, entity); + BFQ_BUG_ON(entity->tree); entity->start = bfq_gt(min_vstart, entity->finish) ? min_vstart : entity->finish; } else { + BFQ_BUG_ON(entity->tree); /* * The finish time of the entity may be invalid, and * it is in the past for sure, otherwise the queue @@ -1007,6 +1177,21 @@ static void __bfq_activate_entity(struct bfq_entity *entity, */ bfq_get_entity(entity); + BFQ_BUG_ON(entity->on_st_or_in_serv && bfqq); + +#ifdef CONFIG_BFQ_GROUP_IOSCHED + if (entity->on_st_or_in_serv && !bfqq) { + struct bfq_group *bfqg = + container_of(entity, struct bfq_group, + entity); + + bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, + bfqg, + "activate bug, class %d in_service %p", + bfq_class_idx(entity), sd->in_service_entity); + } +#endif + BFQ_BUG_ON(entity->on_st_or_in_serv && !bfqq); entity->on_st_or_in_serv = true; } @@ -1016,10 +1201,13 @@ static void __bfq_activate_entity(struct bfq_entity *entity, container_of(entity, struct bfq_group, entity); struct bfq_data *bfqd = bfqg->bfqd; + BFQ_BUG_ON(!bfqd); if (!entity->in_groups_with_pending_reqs) { entity->in_groups_with_pending_reqs = true; bfqd->num_groups_with_pending_reqs++; } + bfq_log_bfqg(bfqd, bfqg, "num_groups_with_pending_reqs %u", + bfqd->num_groups_with_pending_reqs); } #endif @@ -1046,6 +1234,12 @@ static void __bfq_requeue_entity(struct bfq_entity *entity) struct bfq_sched_data *sd = entity->sched_data; struct bfq_service_tree *st = bfq_entity_service_tree(entity); + BFQ_BUG_ON(!sd); + BFQ_BUG_ON(!st); + + BFQ_BUG_ON(entity != sd->in_service_entity && + entity->tree != &st->active); + if (entity == sd->in_service_entity) { /* * We are requeueing the current in-service entity, @@ -1071,6 +1265,8 @@ static void __bfq_requeue_entity(struct bfq_entity *entity) */ bfq_calc_finish(entity, entity->service); entity->start = entity->finish; + BFQ_BUG_ON(entity->tree && entity->tree == &st->idle); + BFQ_BUG_ON(entity->tree && entity->tree != &st->active); /* * In addition, if the entity had more than one child * when set in service, then it was not extracted from @@ -1148,12 +1344,20 @@ static void bfq_activate_requeue_entity(struct bfq_entity *entity, struct bfq_sched_data *sd; for_each_entity(entity) { + BFQ_BUG_ON(!entity); sd = entity->sched_data; __bfq_activate_requeue_entity(entity, sd, non_blocking_wait_rq); + BFQ_BUG_ON(RB_EMPTY_ROOT(&sd->service_tree->active) && + RB_EMPTY_ROOT(&(sd->service_tree+1)->active) && + RB_EMPTY_ROOT(&(sd->service_tree+2)->active)); + if (!bfq_update_next_in_service(sd, entity, expiration) && - !requeue) + !requeue) { + BFQ_BUG_ON(!sd->next_in_service); break; + } + BFQ_BUG_ON(!sd->next_in_service); } } @@ -1173,11 +1377,11 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree) struct bfq_service_tree *st; bool is_in_service; - if (!entity->on_st_or_in_serv) /* - * entity never activated, or - * already inactive - */ + if (!entity->on_st_or_in_serv) { + /* entity never activated, or already inactive */ + BFQ_BUG_ON(sd && entity == sd->in_service_entity); return false; + } /* * If we get here, then entity is active, which implies that @@ -1188,6 +1392,8 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree) st = bfq_entity_service_tree(entity); is_in_service = entity == sd->in_service_entity; + BFQ_BUG_ON(is_in_service && entity->tree && entity->tree != &st->active); + bfq_calc_finish(entity, entity->service); if (is_in_service) @@ -1204,6 +1410,8 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree) bfq_active_extract(st, entity); else if (!is_in_service && entity->tree == &st->idle) bfq_idle_extract(st, entity); + else if (entity->tree) + BUG(); if (!ins_into_idle_tree || !bfq_gt(entity->finish, st->vtime)) bfq_forget_entity(st, entity, is_in_service); @@ -1230,6 +1438,18 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, for_each_entity_safe(entity, parent) { sd = entity->sched_data; + BFQ_BUG_ON(sd == NULL); /* + * It would mean that this is the + * root group. + */ + + BFQ_BUG_ON(expiration && entity != sd->in_service_entity); + + BFQ_BUG_ON(entity != sd->in_service_entity && + entity->tree == + &bfq_entity_service_tree(entity)->active && + !sd->next_in_service); + if (!__bfq_deactivate_entity(entity, ins_into_idle_tree)) { /* * entity is not in any tree any more, so @@ -1238,6 +1458,10 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, * (in case of expiration, this can never * happen). */ + BFQ_BUG_ON(expiration); /* + * entity cannot be already out of + * any tree + */ return; } @@ -1270,6 +1494,8 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, * next_in_service. See the comments on the * definition of next_in_service for details. */ + BFQ_BUG_ON(sd->next_in_service == entity); + BFQ_BUG_ON(sd->in_service_entity == entity); break; } @@ -1297,6 +1523,8 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, */ entity = parent; for_each_entity(entity) { + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + /* * Invoke __bfq_requeue_entity on entity, even if * already active, to requeue/reposition it in the @@ -1306,6 +1534,21 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, __bfq_requeue_entity(entity); sd = entity->sched_data; + BFQ_BUG_ON(expiration && sd->in_service_entity != entity); + + if (bfqq) + bfq_log_bfqq(bfqq->bfqd, bfqq, + "invoking udpdate_next for this queue"); +#ifdef CONFIG_BFQ_GROUP_IOSCHED + else { + struct bfq_group *bfqg = + container_of(entity, + struct bfq_group, entity); + + bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, + "invoking udpdate_next for this entity"); + } +#endif if (!bfq_update_next_in_service(sd, entity, expiration) && !expiration) /* @@ -1329,9 +1572,26 @@ static u64 bfq_calc_vtime_jump(struct bfq_service_tree *st) { struct bfq_entity *root_entity = bfq_root_active_entity(&st->active); - if (bfq_gt(root_entity->min_start, st->vtime)) - return root_entity->min_start; + if (bfq_gt(root_entity->min_start, st->vtime)) { + struct bfq_queue *bfqq = bfq_entity_to_bfqq(root_entity); + + if (bfqq) + bfq_log_bfqq(bfqq->bfqd, bfqq, + "new value %llu", + ((root_entity->min_start>>10)*1000)>>12); +#ifdef CONFIG_BFQ_GROUP_IOSCHED + else { + struct bfq_group *bfqg = + container_of(root_entity, struct bfq_group, + entity); + bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, + "new value %llu", + ((root_entity->min_start>>10)*1000)>>12); + } +#endif + return root_entity->min_start; + } return st->vtime; } @@ -1367,6 +1627,8 @@ static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st, if (!bfq_gt(entry->start, vtime)) first = entry; + BFQ_BUG_ON(bfq_gt(entry->min_start, vtime)); + if (node->rb_left) { entry = rb_entry(node->rb_left, struct bfq_entity, rb_node); @@ -1380,6 +1642,7 @@ static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st, node = node->rb_right; } + BFQ_BUG_ON(!first && !RB_EMPTY_ROOT(&st->active)); return first; } @@ -1410,6 +1673,7 @@ __bfq_lookup_next_entity(struct bfq_service_tree *st, bool in_service) { struct bfq_entity *entity; u64 new_vtime; + struct bfq_queue *bfqq; if (RB_EMPTY_ROOT(&st->active)) return NULL; @@ -1434,6 +1698,29 @@ __bfq_lookup_next_entity(struct bfq_service_tree *st, bool in_service) bfq_update_vtime(st, new_vtime); entity = bfq_first_active_entity(st, new_vtime); + BFQ_BUG_ON(bfq_gt(entity->start, new_vtime)); + + /* Log some information */ + bfqq = bfq_entity_to_bfqq(entity); + if (bfqq) + bfq_log_bfqq(bfqq->bfqd, bfqq, + "start %llu vtime %llu st %p", + ((entity->start>>10)*1000)>>12, + ((new_vtime>>10)*1000)>>12, st); +#ifdef CONFIG_BFQ_GROUP_IOSCHED + else { + struct bfq_group *bfqg = + container_of(entity, struct bfq_group, entity); + + bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, + "start %llu vtime %llu (%llu) st %p", + ((entity->start>>10)*1000)>>12, + ((st->vtime>>10)*1000)>>12, + ((new_vtime>>10)*1000)>>12, st); + } +#endif + + BFQ_BUG_ON(!entity); return entity; } @@ -1453,8 +1740,11 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, struct bfq_service_tree *st = sd->service_tree; struct bfq_service_tree *idle_class_st = st + (BFQ_IOPRIO_CLASSES - 1); struct bfq_entity *entity = NULL; + struct bfq_queue *bfqq; int class_idx = 0; + BFQ_BUG_ON(!sd); + BFQ_BUG_ON(!st); /* * Choose from idle class, if needed to guarantee a minimum * bandwidth to this class (and if there is some active entity @@ -1498,9 +1788,29 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, break; } + BFQ_BUG_ON(!entity && + (!RB_EMPTY_ROOT(&st->active) || !RB_EMPTY_ROOT(&(st+1)->active) || + !RB_EMPTY_ROOT(&(st+2)->active))); + if (!entity) return NULL; + /* Log some information */ + bfqq = bfq_entity_to_bfqq(entity); + if (bfqq) + bfq_log_bfqq(bfqq->bfqd, bfqq, "chosen from st %p %d", + st + class_idx, class_idx); +#ifdef CONFIG_BFQ_GROUP_IOSCHED + else { + struct bfq_group *bfqg = + container_of(entity, struct bfq_group, entity); + + bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, + "chosen from st %p %d", + st + class_idx, class_idx); + } +#endif + return entity; } @@ -1520,6 +1830,8 @@ struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) struct bfq_sched_data *sd; struct bfq_queue *bfqq; + BFQ_BUG_ON(bfqd->in_service_queue); + if (bfq_tot_busy_queues(bfqd) == 0) return NULL; @@ -1530,6 +1842,25 @@ struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) */ sd = &bfqd->root_group->sched_data; for (; sd ; sd = entity->my_sched_data) { +#ifdef CONFIG_BFQ_GROUP_IOSCHED + if (entity) { + struct bfq_group *bfqg = + container_of(entity, struct bfq_group, entity); + + bfq_log_bfqg(bfqd, bfqg, + "lookup in this group"); + if (!sd->next_in_service) + pr_crit("lookup in this group"); + } else { + bfq_log_bfqg(bfqd, bfqd->root_group, + "lookup in root group"); + if (!sd->next_in_service) + pr_crit("lookup in root group"); + } +#endif + + BFQ_BUG_ON(!sd->next_in_service); + /* * WARNING. We are about to set the in-service entity * to sd->next_in_service, i.e., to the (cached) value @@ -1588,9 +1919,29 @@ struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) * whether the parent entity of the leaf entity * becomes the next-to-serve, and so on. */ + + /* Log some information */ + bfqq = bfq_entity_to_bfqq(entity); + if (bfqq) + bfq_log_bfqq(bfqd, bfqq, + "this queue, finish %llu", + (((entity->finish>>10)*1000)>>10)>>2); +#ifdef CONFIG_BFQ_GROUP_IOSCHED + else { + struct bfq_group *bfqg = + container_of(entity, struct bfq_group, entity); + + bfq_log_bfqg(bfqd, bfqg, + "this entity, finish %llu", + (((entity->finish>>10)*1000)>>10)>>2); + } +#endif + } + BFQ_BUG_ON(!entity); bfqq = bfq_entity_to_bfqq(entity); + BFQ_BUG_ON(!bfqq); /* * We can finally update all next-to-serve entities along the @@ -1658,6 +2009,11 @@ void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) { struct bfq_entity *entity = &bfqq->entity; + struct bfq_service_tree *st = bfq_entity_service_tree(entity); + + BFQ_BUG_ON(bfqq == bfqd->in_service_queue); + BFQ_BUG_ON(entity->tree != &st->active && entity->tree != &st->idle && + entity->on_st_or_in_serv); bfq_activate_requeue_entity(entity, bfq_bfqq_non_blocking_wait_rq(bfqq), false, false); @@ -1681,17 +2037,24 @@ void bfq_requeue_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq, bool expiration) { + BFQ_BUG_ON(!bfq_bfqq_busy(bfqq)); + BFQ_BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); + bfq_log_bfqq(bfqd, bfqq, "del from busy"); bfq_clear_bfqq_busy(bfqq); + BFQ_BUG_ON(bfq_tot_busy_queues(bfqd) == 0); bfqd->busy_queues[bfqq->ioprio_class - 1]--; - if (bfqq->wr_coeff > 1) + if (bfqq->wr_coeff > 1) { bfqd->wr_busy_queues--; + BFQ_BUG_ON(bfqd->wr_busy_queues < 0); + } bfqg_stats_update_dequeue(bfqq_group(bfqq)); + BFQ_BUG_ON(bfqq->entity.budget < 0); bfq_deactivate_bfqq(bfqd, bfqq, true, expiration); if (!bfqq->dispatched) @@ -1703,6 +2066,9 @@ void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq, */ void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq) { + BFQ_BUG_ON(bfq_bfqq_busy(bfqq)); + BFQ_BUG_ON(bfqq == bfqd->in_service_queue); + bfq_log_bfqq(bfqd, bfqq, "add to busy"); bfq_activate_bfqq(bfqd, bfqq); @@ -1715,6 +2081,9 @@ void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfq_weights_tree_add(bfqd, bfqq, &bfqd->queue_weights_tree); - if (bfqq->wr_coeff > 1) + if (bfqq->wr_coeff > 1) { bfqd->wr_busy_queues++; + BFQ_BUG_ON(bfqd->wr_busy_queues > bfq_tot_busy_queues(bfqd)); + } + } diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index fe62e7ce0..0476360f0 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -63,7 +63,7 @@ static inline void blk_mq_sched_requeue_request(struct request *rq) struct request_queue *q = rq->q; struct elevator_queue *e = q->elevator; - if (e && e->type->ops.requeue_request) + if ((rq->rq_flags & RQF_ELVPRIV) && e && e->type->ops.requeue_request) e->type->ops.requeue_request(rq); } diff --git a/block/blk-mq.c b/block/blk-mq.c index fc5a7bdb4..2a1eff60c 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2665,6 +2665,7 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set, goto free_hctx; atomic_set(&hctx->nr_active, 0); + atomic_set(&hctx->elevator_queued, 0); if (node == NUMA_NO_NODE) node = set->numa_node; hctx->numa_node = node; diff --git a/block/elevator.c b/block/elevator.c index 293c5c813..3dd67b507 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -203,7 +203,7 @@ static inline void __elv_rqhash_del(struct request *rq) rq->rq_flags &= ~RQF_HASHED; } -void elv_rqhash_del(struct request_queue *q, struct request *rq) +void elv_rqhash_del(struct request *rq) { if (ELV_ON_HASH(rq)) __elv_rqhash_del(rq); @@ -417,7 +417,7 @@ struct request *elv_latter_request(struct request_queue *q, struct request *rq) struct elevator_queue *e = q->elevator; if (e->type->ops.next_request) - return e->type->ops.next_request(q, rq); + return e->type->ops.next_request(rq); return NULL; } @@ -427,7 +427,7 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq) struct elevator_queue *e = q->elevator; if (e->type->ops.former_request) - return e->type->ops.former_request(q, rq); + return e->type->ops.former_request(rq); return NULL; } @@ -616,15 +616,15 @@ static inline bool elv_support_iosched(struct request_queue *q) } /* - * For single queue devices, default to using mq-deadline. If we have multiple - * queues or mq-deadline is not available, default to "none". + * For single queue devices, default to using bfq. If we have multiple + * queues or bfq is not available, default to "none". */ static struct elevator_type *elevator_get_default(struct request_queue *q) { if (q->nr_hw_queues != 1) return NULL; - return elevator_get(q, "mq-deadline", false); + return elevator_get(q, "bfq", false); } /* @@ -802,8 +802,7 @@ ssize_t elv_iosched_show(struct request_queue *q, char *name) return len; } -struct request *elv_rb_former_request(struct request_queue *q, - struct request *rq) +struct request *elv_rb_former_request(struct request *rq) { struct rb_node *rbprev = rb_prev(&rq->rb_node); @@ -814,8 +813,7 @@ struct request *elv_rb_former_request(struct request_queue *q, } EXPORT_SYMBOL(elv_rb_former_request); -struct request *elv_rb_latter_request(struct request_queue *q, - struct request *rq) +struct request *elv_rb_latter_request(struct request *rq) { struct rb_node *rbnext = rb_next(&rq->rb_node); diff --git a/block/mq-deadline.c b/block/mq-deadline.c index b57470e15..a8152187f 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -118,7 +118,7 @@ static void deadline_remove_request(struct request_queue *q, struct request *rq) if (!RB_EMPTY_NODE(&rq->rb_node)) deadline_del_rq_rb(dd, rq); - elv_rqhash_del(q, rq); + elv_rqhash_del(rq); if (q->last_merge == rq) q->last_merge = NULL; } @@ -386,6 +386,8 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx) spin_lock(&dd->lock); rq = __dd_dispatch_request(dd); spin_unlock(&dd->lock); + if (rq) + atomic_dec(&rq->mq_hctx->elevator_queued); return rq; } @@ -533,6 +535,7 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx, rq = list_first_entry(list, struct request, queuelist); list_del_init(&rq->queuelist); dd_insert_request(hctx, rq, at_head); + atomic_inc(&hctx->elevator_queued); } spin_unlock(&dd->lock); } @@ -579,6 +582,9 @@ static bool dd_has_work(struct blk_mq_hw_ctx *hctx) { struct deadline_data *dd = hctx->queue->elevator->elevator_data; + if (!atomic_read(&hctx->elevator_queued)) + return false; + return !list_empty_careful(&dd->dispatch) || !list_empty_careful(&dd->fifo_list[0]) || !list_empty_careful(&dd->fifo_list[1]); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 1626eb27b..f8ea27423 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -139,6 +139,10 @@ struct blk_mq_hw_ctx { * shared across request queues. */ atomic_t nr_active; + /** + * @elevator_queued: Number of queued requests on hctx. + */ + atomic_t elevator_queued; /** @cpuhp_online: List to store request if CPU is going to die */ struct hlist_node cpuhp_online; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 542471b76..f152c53f0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -104,6 +104,10 @@ typedef __u32 __bitwise req_flags_t; #define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 20)) /* ->timeout has been called, don't expire again */ #define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21)) +/* DEBUG: rq in bfq-dev dispatch list */ +#define RQF_DISP_LIST ((__force req_flags_t)(1 << 22)) +/* DEBUG: rq had get_rq_private executed on it */ +#define RQF_GOT ((__force req_flags_t)(1 << 23)) /* flags that prevent us from merging requests: */ #define RQF_NOMERGE_FLAGS \ diff --git a/include/linux/elevator.h b/include/linux/elevator.h index bacc40a0b..3821cc1de 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -46,8 +46,8 @@ struct elevator_mq_ops { bool (*has_work)(struct blk_mq_hw_ctx *); void (*completed_request)(struct request *, u64); void (*requeue_request)(struct request *); - struct request *(*former_request)(struct request_queue *, struct request *); - struct request *(*next_request)(struct request_queue *, struct request *); + struct request *(*former_request)(struct request *); + struct request *(*next_request)(struct request *); void (*init_icq)(struct io_cq *); void (*exit_icq)(struct io_cq *); }; @@ -90,7 +90,7 @@ struct elevator_type #define ELV_HASH_BITS 6 -void elv_rqhash_del(struct request_queue *q, struct request *rq); +void elv_rqhash_del(struct request *rq); void elv_rqhash_add(struct request_queue *q, struct request *rq); void elv_rqhash_reposition(struct request_queue *q, struct request *rq); struct request *elv_rqhash_find(struct request_queue *q, sector_t offset); @@ -140,8 +140,8 @@ extern struct elevator_queue *elevator_alloc(struct request_queue *, /* * Helper functions. */ -extern struct request *elv_rb_former_request(struct request_queue *, struct request *); -extern struct request *elv_rb_latter_request(struct request_queue *, struct request *); +extern struct request *elv_rb_former_request(struct request *); +extern struct request *elv_rb_latter_request(struct request *); /* * rb support functions.