Embedded Freedom
• source navigation • diff markup • identifier search • freetext search •
1 /* 2 * CFQ, or complete fairness queueing, disk scheduler. 3 * 4 * Based on ideas from a previously unfinished io 5 * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli. 6 * 7 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> 8 */ 9 #include <linux/module.h> 10 #include <linux/blkdev.h> 11 #include <linux/elevator.h> 12 #include <linux/rbtree.h> 13 #include <linux/ioprio.h> 14 #include <linux/blktrace_api.h> 15 16 /* 17 * tunables 18 */ 19 /* max queue in one round of service */ 20 static const int cfq_quantum = 4; 21 static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; 22 /* maximum backwards seek, in KiB */ 23 static const int cfq_back_max = 16 * 1024; 24 /* penalty of a backwards seek */ 25 static const int cfq_back_penalty = 2; 26 static const int cfq_slice_sync = HZ / 10; 27 static int cfq_slice_async = HZ / 25; 28 static const int cfq_slice_async_rq = 2; 29 static int cfq_slice_idle = HZ / 125; 30 31 /* 32 * offset from end of service tree 33 */ 34 #define CFQ_IDLE_DELAY (HZ / 5) 35 36 /* 37 * below this threshold, we consider thinktime immediate 38 */ 39 #define CFQ_MIN_TT (2) 40 41 #define CFQ_SLICE_SCALE (5) 42 #define CFQ_HW_QUEUE_MIN (5) 43 44 #define RQ_CIC(rq) \ 45 ((struct cfq_io_context *) (rq)->elevator_private) 46 #define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private2) 47 48 static struct kmem_cache *cfq_pool; 49 static struct kmem_cache *cfq_ioc_pool; 50 51 static DEFINE_PER_CPU(unsigned long, ioc_count); 52 static struct completion *ioc_gone; 53 static DEFINE_SPINLOCK(ioc_gone_lock); 54 55 #define CFQ_PRIO_LISTS IOPRIO_BE_NR 56 #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) 57 #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) 58 59 #define ASYNC (0) 60 #define SYNC (1) 61 62 #define sample_valid(samples) ((samples) > 80) 63 64 /* 65 * Most of our rbtree usage is for sorting with min extraction, so 66 * if we cache the leftmost node we don't have to walk down the tree 67 * to find it. Idea borrowed from Ingo Molnars CFS scheduler. We should 68 * move this into the elevator for the rq sorting as well. 69 */ 70 struct cfq_rb_root { 71 struct rb_root rb; 72 struct rb_node *left; 73 }; 74 #define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, } 75 76 /* 77 * Per block device queue structure 78 */ 79 struct cfq_data { 80 struct request_queue *queue; 81 82 /* 83 * rr list of queues with requests and the count of them 84 */ 85 struct cfq_rb_root service_tree; 86 unsigned int busy_queues; 87 88 int rq_in_driver; 89 int sync_flight; 90 91 /* 92 * queue-depth detection 93 */ 94 int rq_queued; 95 int hw_tag; 96 int hw_tag_samples; 97 int rq_in_driver_peak; 98 99 /* 100 * idle window management 101 */ 102 struct timer_list idle_slice_timer; 103 struct work_struct unplug_work; 104 105 struct cfq_queue *active_queue; 106 struct cfq_io_context *active_cic; 107 108 /* 109 * async queue for each priority case 110 */ 111 struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR]; 112 struct cfq_queue *async_idle_cfqq; 113 114 sector_t last_position; 115 unsigned long last_end_request; 116 117 /* 118 * tunables, see top of file 119 */ 120 unsigned int cfq_quantum; 121 unsigned int cfq_fifo_expire[2]; 122 unsigned int cfq_back_penalty; 123 unsigned int cfq_back_max; 124 unsigned int cfq_slice[2]; 125 unsigned int cfq_slice_async_rq; 126 unsigned int cfq_slice_idle; 127 128 struct list_head cic_list; 129 }; 130 131 /* 132 * Per process-grouping structure 133 */ 134 struct cfq_queue { 135 /* reference count */ 136 atomic_t ref; 137 /* various state flags, see below */ 138 unsigned int flags; 139 /* parent cfq_data */ 140 struct cfq_data *cfqd; 141 /* service_tree member */ 142 struct rb_node rb_node; 143 /* service_tree key */ 144 unsigned long rb_key; 145 /* sorted list of pending requests */ 146 struct rb_root sort_list; 147 /* if fifo isn't expired, next request to serve */ 148 struct request *next_rq; 149 /* requests queued in sort_list */ 150 int queued[2]; 151 /* currently allocated requests */ 152 int allocated[2]; 153 /* fifo list of requests in sort_list */ 154 struct list_head fifo; 155 156 unsigned long slice_end; 157 long slice_resid; 158 159 /* pending metadata requests */ 160 int meta_pending; 161 /* number of requests that are on the dispatch list or inside driver */ 162 int dispatched; 163 164 /* io prio of this group */ 165 unsigned short ioprio, org_ioprio; 166 unsigned short ioprio_class, org_ioprio_class; 167 168 pid_t pid; 169 }; 170 171 enum cfqq_state_flags { 172 CFQ_CFQQ_FLAG_on_rr = 0, /* on round-robin busy list */ 173 CFQ_CFQQ_FLAG_wait_request, /* waiting for a request */ 174 CFQ_CFQQ_FLAG_must_alloc, /* must be allowed rq alloc */ 175 CFQ_CFQQ_FLAG_must_alloc_slice, /* per-slice must_alloc flag */ 176 CFQ_CFQQ_FLAG_must_dispatch, /* must dispatch, even if expired */ 177 CFQ_CFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */ 178 CFQ_CFQQ_FLAG_idle_window, /* slice idling enabled */ 179 CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */ 180 CFQ_CFQQ_FLAG_queue_new, /* queue never been serviced */ 181 CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */ 182 CFQ_CFQQ_FLAG_sync, /* synchronous queue */ 183 }; 184 185 #define CFQ_CFQQ_FNS(name) \ 186 static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq) \ 187 { \ 188 (cfqq)->flags |= (1 << CFQ_CFQQ_FLAG_##name); \ 189 } \ 190 static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq) \ 191 { \ 192 (cfqq)->flags &= ~(1 << CFQ_CFQQ_FLAG_##name); \ 193 } \ 194 static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \ 195 { \ 196 return ((cfqq)->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0; \ 197 } 198 199 CFQ_CFQQ_FNS(on_rr); 200 CFQ_CFQQ_FNS(wait_request); 201 CFQ_CFQQ_FNS(must_alloc); 202 CFQ_CFQQ_FNS(must_alloc_slice); 203 CFQ_CFQQ_FNS(must_dispatch); 204 CFQ_CFQQ_FNS(fifo_expire); 205 CFQ_CFQQ_FNS(idle_window); 206 CFQ_CFQQ_FNS(prio_changed); 207 CFQ_CFQQ_FNS(queue_new); 208 CFQ_CFQQ_FNS(slice_new); 209 CFQ_CFQQ_FNS(sync); 210 #undef CFQ_CFQQ_FNS 211 212 #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ 213 blk_add_trace_msg((cfqd)->queue, "cfq%d " fmt, (cfqq)->pid, ##args) 214 #define cfq_log(cfqd, fmt, args...) \ 215 blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args) 216 217 static void cfq_dispatch_insert(struct request_queue *, struct request *); 218 static struct cfq_queue *cfq_get_queue(struct cfq_data *, int, 219 struct io_context *, gfp_t); 220 static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *, 221 struct io_context *); 222 223 static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic, 224 int is_sync) 225 { 226 return cic->cfqq[!!is_sync]; 227 } 228 229 static inline void cic_set_cfqq(struct cfq_io_context *cic, 230 struct cfq_queue *cfqq, int is_sync) 231 { 232 cic->cfqq[!!is_sync] = cfqq; 233 } 234 235 /* 236 * We regard a request as SYNC, if it's either a read or has the SYNC bit 237 * set (in which case it could also be direct WRITE). 238 */ 239 static inline int cfq_bio_sync(struct bio *bio) 240 { 241 if (bio_data_dir(bio) == READ || bio_sync(bio)) 242 return 1; 243 244 return 0; 245 } 246 247 /* 248 * scheduler run of queue, if there are requests pending and no one in the 249 * driver that will restart queueing 250 */ 251 static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) 252 { 253 if (cfqd->busy_queues) { 254 cfq_log(cfqd, "schedule dispatch"); 255 kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work); 256 } 257 } 258 259 static int cfq_queue_empty(struct request_queue *q) 260 { 261 struct cfq_data *cfqd = q->elevator->elevator_data; 262 263 return !cfqd->busy_queues; 264 } 265 266 /* 267 * Scale schedule slice based on io priority. Use the sync time slice only 268 * if a queue is marked sync and has sync io queued. A sync queue with async 269 * io only, should not get full sync slice length. 270 */ 271 static inline int cfq_prio_slice(struct cfq_data *cfqd, int sync, 272 unsigned short prio) 273 { 274 const int base_slice = cfqd->cfq_slice[sync]; 275 276 WARN_ON(prio >= IOPRIO_BE_NR); 277 278 return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - prio)); 279 } 280 281 static inline int 282 cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 283 { 284 return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio); 285 } 286 287 static inline void 288 cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 289 { 290 cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies; 291 cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies); 292 } 293 294 /* 295 * We need to wrap this check in cfq_cfqq_slice_new(), since ->slice_end 296 * isn't valid until the first request from the dispatch is activated 297 * and the slice time set. 298 */ 299 static inline int cfq_slice_used(struct cfq_queue *cfqq) 300 { 301 if (cfq_cfqq_slice_new(cfqq)) 302 return 0; 303 if (time_before(jiffies, cfqq->slice_end)) 304 return 0; 305 306 return 1; 307 } 308 309 /* 310 * Lifted from AS - choose which of rq1 and rq2 that is best served now. 311 * We choose the request that is closest to the head right now. Distance 312 * behind the head is penalized and only allowed to a certain extent. 313 */ 314 static struct request * 315 cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2) 316 { 317 sector_t last, s1, s2, d1 = 0, d2 = 0; 318 unsigned long back_max; 319 #define CFQ_RQ1_WRAP 0x01 /* request 1 wraps */ 320 #define CFQ_RQ2_WRAP 0x02 /* request 2 wraps */ 321 unsigned wrap = 0; /* bit mask: requests behind the disk head? */ 322 323 if (rq1 == NULL || rq1 == rq2) 324 return rq2; 325 if (rq2 == NULL) 326 return rq1; 327 328 if (rq_is_sync(rq1) && !rq_is_sync(rq2)) 329 return rq1; 330 else if (rq_is_sync(rq2) && !rq_is_sync(rq1)) 331 return rq2; 332 if (rq_is_meta(rq1) && !rq_is_meta(rq2)) 333 return rq1; 334 else if (rq_is_meta(rq2) && !rq_is_meta(rq1)) 335 return rq2; 336 337 s1 = rq1->sector; 338 s2 = rq2->sector; 339 340 last = cfqd->last_position; 341 342 /* 343 * by definition, 1KiB is 2 sectors 344 */ 345 back_max = cfqd->cfq_back_max * 2; 346 347 /* 348 * Strict one way elevator _except_ in the case where we allow 349 * short backward seeks which are biased as twice the cost of a 350 * similar forward seek. 351 */ 352 if (s1 >= last) 353 d1 = s1 - last; 354 else if (s1 + back_max >= last) 355 d1 = (last - s1) * cfqd->cfq_back_penalty; 356 else 357 wrap |= CFQ_RQ1_WRAP; 358 359 if (s2 >= last) 360 d2 = s2 - last; 361 else if (s2 + back_max >= last) 362 d2 = (last - s2) * cfqd->cfq_back_penalty; 363 else 364 wrap |= CFQ_RQ2_WRAP; 365 366 /* Found required data */ 367 368 /* 369 * By doing switch() on the bit mask "wrap" we avoid having to 370 * check two variables for all permutations: --> faster! 371 */ 372 switch (wrap) { 373 case 0: /* common case for CFQ: rq1 and rq2 not wrapped */ 374 if (d1 < d2) 375 return rq1; 376 else if (d2 < d1) 377 return rq2; 378 else { 379 if (s1 >= s2) 380 return rq1; 381 else 382 return rq2; 383 } 384 385 case CFQ_RQ2_WRAP: 386 return rq1; 387 case CFQ_RQ1_WRAP: 388 return rq2; 389 case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both rqs wrapped */ 390 default: 391 /* 392 * Since both rqs are wrapped, 393 * start with the one that's further behind head 394 * (--> only *one* back seek required), 395 * since back seek takes more time than forward. 396 */ 397 if (s1 <= s2) 398 return rq1; 399 else 400 return rq2; 401 } 402 } 403 404 /* 405 * The below is leftmost cache rbtree addon 406 */ 407 static struct cfq_queue *cfq_rb_first(struct cfq_rb_root *root) 408 { 409 if (!root->left) 410 root->left = rb_first(&root->rb); 411 412 if (root->left) 413 return rb_entry(root->left, struct cfq_queue, rb_node); 414 415 return NULL; 416 } 417 418 static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root) 419 { 420 if (root->left == n) 421 root->left = NULL; 422 423 rb_erase(n, &root->rb); 424 RB_CLEAR_NODE(n); 425 } 426 427 /* 428 * would be nice to take fifo expire time into account as well 429 */ 430 static struct request * 431 cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq, 432 struct request *last) 433 { 434 struct rb_node *rbnext = rb_next(&last->rb_node); 435 struct rb_node *rbprev = rb_prev(&last->rb_node); 436 struct request *next = NULL, *prev = NULL; 437 438 BUG_ON(RB_EMPTY_NODE(&last->rb_node)); 439 440 if (rbprev) 441 prev = rb_entry_rq(rbprev); 442 443 if (rbnext) 444 next = rb_entry_rq(rbnext); 445 else { 446 rbnext = rb_first(&cfqq->sort_list); 447 if (rbnext && rbnext != &last->rb_node) 448 next = rb_entry_rq(rbnext); 449 } 450 451 return cfq_choose_req(cfqd, next, prev); 452 } 453 454 static unsigned long cfq_slice_offset(struct cfq_data *cfqd, 455 struct cfq_queue *cfqq) 456 { 457 /* 458 * just an approximation, should be ok. 459 */ 460 return (cfqd->busy_queues - 1) * (cfq_prio_slice(cfqd, 1, 0) - 461 cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio)); 462 } 463 464 /* 465 * The cfqd->service_tree holds all pending cfq_queue's that have 466 * requests waiting to be processed. It is sorted in the order that 467 * we will service the queues. 468 */ 469 static void cfq_service_tree_add(struct cfq_data *cfqd, 470 struct cfq_queue *cfqq, int add_front) 471 { 472 struct rb_node **p, *parent; 473 struct cfq_queue *__cfqq; 474 unsigned long rb_key; 475 int left; 476 477 if (cfq_class_idle(cfqq)) { 478 rb_key = CFQ_IDLE_DELAY; 479 parent = rb_last(&cfqd->service_tree.rb); 480 if (parent && parent != &cfqq->rb_node) { 481 __cfqq = rb_entry(parent, struct cfq_queue, rb_node); 482 rb_key += __cfqq->rb_key; 483 } else 484 rb_key += jiffies; 485 } else if (!add_front) { 486 rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies; 487 rb_key += cfqq->slice_resid; 488 cfqq->slice_resid = 0; 489 } else 490 rb_key = 0; 491 492 if (!RB_EMPTY_NODE(&cfqq->rb_node)) { 493 /* 494 * same position, nothing more to do 495 */ 496 if (rb_key == cfqq->rb_key) 497 return; 498 499 cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree); 500 } 501 502 left = 1; 503 parent = NULL; 504 p = &cfqd->service_tree.rb.rb_node; 505 while (*p) { 506 struct rb_node **n; 507 508 parent = *p; 509 __cfqq = rb_entry(parent, struct cfq_queue, rb_node); 510 511 /* 512 * sort RT queues first, we always want to give 513 * preference to them. IDLE queues goes to the back. 514 * after that, sort on the next service time. 515 */ 516 if (cfq_class_rt(cfqq) > cfq_class_rt(__cfqq)) 517 n = &(*p)->rb_left; 518 else if (cfq_class_rt(cfqq) < cfq_class_rt(__cfqq)) 519 n = &(*p)->rb_right; 520 else if (cfq_class_idle(cfqq) < cfq_class_idle(__cfqq)) 521 n = &(*p)->rb_left; 522 else if (cfq_class_idle(cfqq) > cfq_class_idle(__cfqq)) 523 n = &(*p)->rb_right; 524 else if (rb_key < __cfqq->rb_key) 525 n = &(*p)->rb_left; 526 else 527 n = &(*p)->rb_right; 528 529 if (n == &(*p)->rb_right) 530 left = 0; 531 532 p = n; 533 } 534 535 if (left) 536 cfqd->service_tree.left = &cfqq->rb_node; 537 538 cfqq->rb_key = rb_key; 539 rb_link_node(&cfqq->rb_node, parent, p); 540 rb_insert_color(&cfqq->rb_node, &cfqd->service_tree.rb); 541 } 542 543 /* 544 * Update cfqq's position in the service tree. 545 */ 546 static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq) 547 { 548 /* 549 * Resorting requires the cfqq to be on the RR list already. 550 */ 551 if (cfq_cfqq_on_rr(cfqq)) 552 cfq_service_tree_add(cfqd, cfqq, 0); 553 } 554 555 /* 556 * add to busy list of queues for service, trying to be fair in ordering 557 * the pending list according to last request service 558 */ 559 static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) 560 { 561 cfq_log_cfqq(cfqd, cfqq, "add_to_rr"); 562 BUG_ON(cfq_cfqq_on_rr(cfqq)); 563 cfq_mark_cfqq_on_rr(cfqq); 564 cfqd->busy_queues++; 565 566 cfq_resort_rr_list(cfqd, cfqq); 567 } 568 569 /* 570 * Called when the cfqq no longer has requests pending, remove it from 571 * the service tree. 572 */ 573 static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) 574 { 575 cfq_log_cfqq(cfqd, cfqq, "del_from_rr"); 576 BUG_ON(!cfq_cfqq_on_rr(cfqq)); 577 cfq_clear_cfqq_on_rr(cfqq); 578 579 if (!RB_EMPTY_NODE(&cfqq->rb_node)) 580 cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree); 581 582 BUG_ON(!cfqd->busy_queues); 583 cfqd->busy_queues--; 584 } 585 586 /* 587 * rb tree support functions 588 */ 589 static void cfq_del_rq_rb(struct request *rq) 590 { 591 struct cfq_queue *cfqq = RQ_CFQQ(rq); 592 struct cfq_data *cfqd = cfqq->cfqd; 593 const int sync = rq_is_sync(rq); 594 595 BUG_ON(!cfqq->queued[sync]); 596 cfqq->queued[sync]--; 597 598 elv_rb_del(&cfqq->sort_list, rq); 599 600 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) 601 cfq_del_cfqq_rr(cfqd, cfqq); 602 } 603 604 static void cfq_add_rq_rb(struct request *rq) 605 { 606 struct cfq_queue *cfqq = RQ_CFQQ(rq); 607 struct cfq_data *cfqd = cfqq->cfqd; 608 struct request *__alias; 609 610 cfqq->queued[rq_is_sync(rq)]++; 611 612 /* 613 * looks a little odd, but the first insert might return an alias. 614 * if that happens, put the alias on the dispatch list 615 */ 616 while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL) 617 cfq_dispatch_insert(cfqd->queue, __alias); 618 619 if (!cfq_cfqq_on_rr(cfqq)) 620 cfq_add_cfqq_rr(cfqd, cfqq); 621 622 /* 623 * check if this request is a better next-serve candidate 624 */ 625 cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq); 626 BUG_ON(!cfqq->next_rq); 627 } 628 629 static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq) 630 { 631 elv_rb_del(&cfqq->sort_list, rq); 632 cfqq->queued[rq_is_sync(rq)]--; 633 cfq_add_rq_rb(rq); 634 } 635 636 static struct request * 637 cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio) 638 { 639 struct task_struct *tsk = current; 640 struct cfq_io_context *cic; 641 struct cfq_queue *cfqq; 642 643 cic = cfq_cic_lookup(cfqd, tsk->io_context); 644 if (!cic) 645 return NULL; 646 647 cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio)); 648 if (cfqq) { 649 sector_t sector = bio->bi_sector + bio_sectors(bio); 650 651 return elv_rb_find(&cfqq->sort_list, sector); 652 } 653 654 return NULL; 655 } 656 657 static void cfq_activate_request(struct request_queue *q, struct request *rq) 658 { 659 struct cfq_data *cfqd = q->elevator->elevator_data; 660 661 cfqd->rq_in_driver++; 662 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d", 663 cfqd->rq_in_driver); 664 665 cfqd->last_position = rq->hard_sector + rq->hard_nr_sectors; 666 } 667 668 static void cfq_deactivate_request(struct request_queue *q, struct request *rq) 669 { 670 struct cfq_data *cfqd = q->elevator->elevator_data; 671 672 WARN_ON(!cfqd->rq_in_driver); 673 cfqd->rq_in_driver--; 674 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d", 675 cfqd->rq_in_driver); 676 } 677 678 static void cfq_remove_request(struct request *rq) 679 { 680 struct cfq_queue *cfqq = RQ_CFQQ(rq); 681 682 if (cfqq->next_rq == rq) 683 cfqq->next_rq = cfq_find_next_rq(cfqq->cfqd, cfqq, rq); 684 685 list_del_init(&rq->queuelist); 686 cfq_del_rq_rb(rq); 687 688 cfqq->cfqd->rq_queued--; 689 if (rq_is_meta(rq)) { 690 WARN_ON(!cfqq->meta_pending); 691 cfqq->meta_pending--; 692 } 693 } 694 695 static int cfq_merge(struct request_queue *q, struct request **req, 696 struct bio *bio) 697 { 698 struct cfq_data *cfqd = q->elevator->elevator_data; 699 struct request *__rq; 700 701 __rq = cfq_find_rq_fmerge(cfqd, bio); 702 if (__rq && elv_rq_merge_ok(__rq, bio)) { 703 *req = __rq; 704 return ELEVATOR_FRONT_MERGE; 705 } 706 707 return ELEVATOR_NO_MERGE; 708 } 709 710 static void cfq_merged_request(struct request_queue *q, struct request *req, 711 int type) 712 { 713 if (type == ELEVATOR_FRONT_MERGE) { 714 struct cfq_queue *cfqq = RQ_CFQQ(req); 715 716 cfq_reposition_rq_rb(cfqq, req); 717 } 718 } 719 720 static void 721 cfq_merged_requests(struct request_queue *q, struct request *rq, 722 struct request *next) 723 { 724 /* 725 * reposition in fifo if next is older than rq 726 */ 727 if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && 728 time_before(next->start_time, rq->start_time)) 729 list_move(&rq->queuelist, &next->queuelist); 730 731 cfq_remove_request(next); 732 } 733 734 static int cfq_allow_merge(struct request_queue *q, struct request *rq, 735 struct bio *bio) 736 { 737 struct cfq_data *cfqd = q->elevator->elevator_data; 738 struct cfq_io_context *cic; 739 struct cfq_queue *cfqq; 740 741 /* 742 * Disallow merge of a sync bio into an async request. 743 */ 744 if (cfq_bio_sync(bio) && !rq_is_sync(rq)) 745 return 0; 746 747 /* 748 * Lookup the cfqq that this bio will be queued with. Allow 749 * merge only if rq is queued there. 750 */ 751 cic = cfq_cic_lookup(cfqd, current->io_context); 752 if (!cic) 753 return 0; 754 755 cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio)); 756 if (cfqq == RQ_CFQQ(rq)) 757 return 1; 758 759 return 0; 760 } 761 762 static void __cfq_set_active_queue(struct cfq_data *cfqd, 763 struct cfq_queue *cfqq) 764 { 765 if (cfqq) { 766 cfq_log_cfqq(cfqd, cfqq, "set_active"); 767 cfqq->slice_end = 0; 768 cfq_clear_cfqq_must_alloc_slice(cfqq); 769 cfq_clear_cfqq_fifo_expire(cfqq); 770 cfq_mark_cfqq_slice_new(cfqq); 771 cfq_clear_cfqq_queue_new(cfqq); 772 } 773 774 cfqd->active_queue = cfqq; 775 } 776 777 /* 778 * current cfqq expired its slice (or was too idle), select new one 779 */ 780 static void 781 __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, 782 int timed_out) 783 { 784 cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out); 785 786 if (cfq_cfqq_wait_request(cfqq)) 787 del_timer(&cfqd->idle_slice_timer); 788 789 cfq_clear_cfqq_must_dispatch(cfqq); 790 cfq_clear_cfqq_wait_request(cfqq); 791 792 /* 793 * store what was left of this slice, if the queue idled/timed out 794 */ 795 if (timed_out && !cfq_cfqq_slice_new(cfqq)) { 796 cfqq->slice_resid = cfqq->slice_end - jiffies; 797 cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid); 798 } 799 800 cfq_resort_rr_list(cfqd, cfqq); 801 802 if (cfqq == cfqd->active_queue) 803 cfqd->active_queue = NULL; 804 805 if (cfqd->active_cic) { 806 put_io_context(cfqd->active_cic->ioc); 807 cfqd->active_cic = NULL; 808 } 809 } 810 811 static inline void cfq_slice_expired(struct cfq_data *cfqd, int timed_out) 812 { 813 struct cfq_queue *cfqq = cfqd->active_queue; 814 815 if (cfqq) 816 __cfq_slice_expired(cfqd, cfqq, timed_out); 817 } 818 819 /* 820 * Get next queue for service. Unless we have a queue preemption, 821 * we'll simply select the first cfqq in the service tree. 822 */ 823 static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) 824 { 825 if (RB_EMPTY_ROOT(&cfqd->service_tree.rb)) 826 return NULL; 827 828 return cfq_rb_first(&cfqd->service_tree); 829 } 830 831 /* 832 * Get and set a new active queue for service. 833 */ 834 static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd) 835 { 836 struct cfq_queue *cfqq; 837 838 cfqq = cfq_get_next_queue(cfqd); 839 __cfq_set_active_queue(cfqd, cfqq); 840 return cfqq; 841 } 842 843 static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd, 844 struct request *rq) 845 { 846 if (rq->sector >= cfqd->last_position) 847 return rq->sector - cfqd->last_position; 848 else 849 return cfqd->last_position - rq->sector; 850 } 851 852 static inline int cfq_rq_close(struct cfq_data *cfqd, struct request *rq) 853 { 854 struct cfq_io_context *cic = cfqd->active_cic; 855 856 if (!sample_valid(cic->seek_samples)) 857 return 0; 858 859 return cfq_dist_from_last(cfqd, rq) <= cic->seek_mean; 860 } 861 862 static int cfq_close_cooperator(struct cfq_data *cfq_data, 863 struct cfq_queue *cfqq) 864 { 865 /* 866 * We should notice if some of the queues are cooperating, eg 867 * working closely on the same area of the disk. In that case, 868 * we can group them together and don't waste time idling. 869 */ 870 return 0; 871 } 872 873 #define CIC_SEEKY(cic) ((cic)->seek_mean > (8 * 1024)) 874 875 static void cfq_arm_slice_timer(struct cfq_data *cfqd) 876 { 877 struct cfq_queue *cfqq = cfqd->active_queue; 878 struct cfq_io_context *cic; 879 unsigned long sl; 880 881 /* 882 * SSD device without seek penalty, disable idling. But only do so 883 * for devices that support queuing, otherwise we still have a problem 884 * with sync vs async workloads. 885 */ 886 if (blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag) 887 return; 888 889 WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list)); 890 WARN_ON(cfq_cfqq_slice_new(cfqq)); 891 892 /* 893 * idle is disabled, either manually or by past process history 894 */ 895 if (!cfqd->cfq_slice_idle || !cfq_cfqq_idle_window(cfqq)) 896 return; 897 898 /* 899 * still requests with the driver, don't idle 900 */ 901 if (cfqd->rq_in_driver) 902 return; 903 904 /* 905 * task has exited, don't wait 906 */ 907 cic = cfqd->active_cic; 908 if (!cic || !atomic_read(&cic->ioc->nr_tasks)) 909 return; 910 911 /* 912 * See if this prio level has a good candidate 913 */ 914 if (cfq_close_cooperator(cfqd, cfqq) && 915 (sample_valid(cic->ttime_samples) && cic->ttime_mean > 2)) 916 return; 917 918 cfq_mark_cfqq_must_dispatch(cfqq); 919 cfq_mark_cfqq_wait_request(cfqq); 920 921 /* 922 * we don't want to idle for seeks, but we do want to allow 923 * fair distribution of slice time for a process doing back-to-back 924 * seeks. so allow a little bit of time for him to submit a new rq 925 */ 926 sl = cfqd->cfq_slice_idle; 927 if (sample_valid(cic->seek_samples) && CIC_SEEKY(cic)) 928 sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT)); 929 930 mod_timer(&cfqd->idle_slice_timer, jiffies + sl); 931 cfq_log(cfqd, "arm_idle: %lu", sl); 932 } 933 934 /* 935 * Move request from internal lists to the request queue dispatch list. 936 */ 937 static void cfq_dispatch_insert(struct request_queue *q, struct request *rq) 938 { 939 struct cfq_data *cfqd = q->elevator->elevator_data; 940 struct cfq_queue *cfqq = RQ_CFQQ(rq); 941 942 cfq_log_cfqq(cfqd, cfqq, "dispatch_insert"); 943 944 cfq_remove_request(rq); 945 cfqq->dispatched++; 946 elv_dispatch_sort(q, rq); 947 948 if (cfq_cfqq_sync(cfqq)) 949 cfqd->sync_flight++; 950 } 951 952 /* 953 * return expired entry, or NULL to just start from scratch in rbtree 954 */ 955 static struct request *cfq_check_fifo(struct cfq_queue *cfqq) 956 { 957 struct cfq_data *cfqd = cfqq->cfqd; 958 struct request *rq; 959 int fifo; 960 961 if (cfq_cfqq_fifo_expire(cfqq)) 962 return NULL; 963 964 cfq_mark_cfqq_fifo_expire(cfqq); 965 966 if (list_empty(&cfqq->fifo)) 967 return NULL; 968 969 fifo = cfq_cfqq_sync(cfqq); 970 rq = rq_entry_fifo(cfqq->fifo.next); 971 972 if (time_before(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) 973 rq = NULL; 974 975 cfq_log_cfqq(cfqd, cfqq, "fifo=%p", rq); 976 return rq; 977 } 978 979 static inline int 980 cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq) 981 { 982 const int base_rq = cfqd->cfq_slice_async_rq; 983 984 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); 985 986 return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio)); 987 } 988 989 /* 990 * Select a queue for service. If we have a current active queue, 991 * check whether to continue servicing it, or retrieve and set a new one. 992 */ 993 static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) 994 { 995 struct cfq_queue *cfqq; 996 997 cfqq = cfqd->active_queue; 998 if (!cfqq) 999 goto new_queue; 1000 1001 /* 1002 * The active queue has run out of time, expire it and select new. 1003 */ 1004 if (cfq_slice_used(cfqq)) 1005 goto expire; 1006 1007 /* 1008 * The active queue has requests and isn't expired, allow it to 1009 * dispatch. 1010 */ 1011 if (!RB_EMPTY_ROOT(&cfqq->sort_list)) 1012 goto keep_queue; 1013 1014 /* 1015 * No requests pending. If the active queue still has requests in 1016 * flight or is idling for a new request, allow either of these 1017 * conditions to happen (or time out) before selecting a new queue. 1018 */ 1019 if (timer_pending(&cfqd->idle_slice_timer) || 1020 (cfqq->dispatched && cfq_cfqq_idle_window(cfqq))) { 1021 cfqq = NULL; 1022 goto keep_queue; 1023 } 1024 1025 expire: 1026 cfq_slice_expired(cfqd, 0); 1027 new_queue: 1028 cfqq = cfq_set_active_queue(cfqd); 1029 keep_queue: 1030 return cfqq; 1031 } 1032 1033 /* 1034 * Dispatch some requests from cfqq, moving them to the request queue 1035 * dispatch list. 1036 */ 1037 static int 1038 __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1039 int max_dispatch) 1040 { 1041 int dispatched = 0; 1042 1043 BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list)); 1044 1045 do { 1046 struct request *rq; 1047 1048 /* 1049 * follow expired path, else get first next available 1050 */ 1051 rq = cfq_check_fifo(cfqq); 1052 if (rq == NULL) 1053 rq = cfqq->next_rq; 1054 1055 /* 1056 * finally, insert request into driver dispatch list 1057 */ 1058 cfq_dispatch_insert(cfqd->queue, rq); 1059 1060 dispatched++; 1061 1062 if (!cfqd->active_cic) { 1063 atomic_inc(&RQ_CIC(rq)->ioc->refcount); 1064 cfqd->active_cic = RQ_CIC(rq); 1065 } 1066 1067 if (RB_EMPTY_ROOT(&cfqq->sort_list)) 1068 break; 1069 1070 } while (dispatched < max_dispatch); 1071 1072 /* 1073 * expire an async queue immediately if it has used up its slice. idle 1074 * queue always expire after 1 dispatch round. 1075 */ 1076 if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) && 1077 dispatched >= cfq_prio_to_maxrq(cfqd, cfqq)) || 1078 cfq_class_idle(cfqq))) { 1079 cfqq->slice_end = jiffies + 1; 1080 cfq_slice_expired(cfqd, 0); 1081 } 1082 1083 return dispatched; 1084 } 1085 1086 static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq) 1087 { 1088 int dispatched = 0; 1089 1090 while (cfqq->next_rq) { 1091 cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq); 10