diff -Nur linux-2.4.33-imedia/drivers/block/elevator.c linux-2.4.33-imedia-patching/drivers/block/elevator.c --- linux-2.4.33-imedia/drivers/block/elevator.c 2003-06-13 17:51:32.000000000 +0300 +++ linux-2.4.33-imedia-patching/drivers/block/elevator.c 2006-01-26 15:21:38.000000000 +0200 @@ -82,7 +82,9 @@ { struct list_head *entry = &q->queue_head; unsigned int count = bh->b_size >> 9, ret = ELEVATOR_NO_MERGE; + const int max_bomb_segments = q->elevator.max_bomb_segments; struct request *__rq; + int passed_a_read = 0; int backmerge_only = 0; while (!backmerge_only && (entry = entry->prev) != head) { @@ -100,6 +102,8 @@ continue; if (!*req && bh_rq_in_between(bh, __rq, &q->queue_head) && !backmerge_only) *req = __rq; + if (__rq->cmd != WRITE) + passed_a_read = 1; if (__rq->cmd != rw) continue; if (__rq->nr_sectors + count > max_sectors) @@ -130,6 +134,57 @@ } } + /* + * If we failed to merge a read anywhere in the request + * queue, we really don't want to place it at the end + * of the list, behind lots of writes. So place it near + * the front. + * + * We don't want to place it in front of _all_ writes: that + * would create lots of seeking, and isn't tunable. + * We try to avoid promoting this read in front of existing + * reads. + * + * max_bomb_segments becomes the maximum number of write + * requests which we allow to remain in place in front of + * a newly introduced read. We weight things a little bit, + * so large writes are more expensive than small ones, but it's + * requests which count, not sectors. + */ + if (max_bomb_segments && rw == READ && !passed_a_read && + ret == ELEVATOR_NO_MERGE) { + int cur_latency = 0; + struct request * const cur_request = *req; + + entry = head->next; + while (entry != &q->queue_head) { + struct request *__rq; + + if (entry == &q->queue_head) + BUG(); + if (entry == q->queue_head.next && + q->head_active && !q->plugged) + BUG(); + __rq = blkdev_entry_to_request(entry); + + if (__rq == cur_request) { + /* + * This is where the old algorithm placed it. + * There's no point pushing it further back, + * so leave it here, in sorted order. + */ + break; + } + if (__rq->cmd == WRITE) { + cur_latency += 1 + __rq->nr_sectors / 64; + if (cur_latency >= max_bomb_segments) { + *req = __rq; + break; + } + } + entry = entry->next; + } + } return ret; } @@ -187,7 +242,7 @@ output.queue_ID = elevator->queue_ID; output.read_latency = elevator->read_latency; output.write_latency = elevator->write_latency; - output.max_bomb_segments = 0; + output.max_bomb_segments = elevator->max_bomb_segments; if (copy_to_user(arg, &output, sizeof(blkelv_ioctl_arg_t))) return -EFAULT; @@ -206,9 +261,12 @@ return -EINVAL; if (input.write_latency < 0) return -EINVAL; + if (input.max_bomb_segments < 0) + return -EINVAL; elevator->read_latency = input.read_latency; elevator->write_latency = input.write_latency; + elevator->max_bomb_segments = input.max_bomb_segments; return 0; } diff -Nur linux-2.4.33-imedia/drivers/block/ll_rw_blk.c linux-2.4.33-imedia-patching/drivers/block/ll_rw_blk.c --- linux-2.4.33-imedia/drivers/block/ll_rw_blk.c 2006-01-26 15:21:21.000000000 +0200 +++ linux-2.4.33-imedia-patching/drivers/block/ll_rw_blk.c 2006-01-26 15:21:38.000000000 +0200 @@ -438,27 +438,27 @@ break; memset(rq, 0, sizeof(*rq)); rq->rq_status = RQ_INACTIVE; - list_add(&rq->queue, &q->rq.free); - q->rq.count++; + list_add(&rq->queue, &q->rq.free); + q->rq.count++; q->nr_requests++; } - /* - * Wakeup waiters after both one quarter of the - * max-in-fligh queue and one quarter of the requests - * are available again. - */ + /* + * Wakeup waiters after both one quarter of the + * max-in-fligh queue and one quarter of the requests + * are available again. + */ q->batch_requests = q->nr_requests / 4; if (q->batch_requests > 32) q->batch_requests = 32; - q->batch_sectors = max_queue_sectors / 4; - - q->max_queue_sectors = max_queue_sectors; - - BUG_ON(!q->batch_sectors); - atomic_set(&q->nr_sectors, 0); + q->batch_sectors = max_queue_sectors / 4; + + q->max_queue_sectors = max_queue_sectors; + + BUG_ON(!q->batch_sectors); + atomic_set(&q->nr_sectors, 0); spin_unlock_irqrestore(&io_request_lock, flags); return q->nr_requests; @@ -468,27 +468,29 @@ { struct sysinfo si; int megs; /* Total memory, in megabytes */ - int nr_requests, max_queue_sectors = MAX_QUEUE_SECTORS; - - INIT_LIST_HEAD(&q->rq.free); + int nr_requests, max_queue_sectors = MAX_QUEUE_SECTORS; + + INIT_LIST_HEAD(&q->rq.free); q->rq.count = 0; q->rq.pending[READ] = q->rq.pending[WRITE] = 0; q->nr_requests = 0; si_meminfo(&si); megs = si.totalram >> (20 - PAGE_SHIFT); - nr_requests = MAX_NR_REQUESTS; - if (megs < 30) { - nr_requests /= 2; - max_queue_sectors /= 2; - } - /* notice early if anybody screwed the defaults */ - BUG_ON(!nr_requests); - BUG_ON(!max_queue_sectors); - - blk_grow_request_list(q, nr_requests, max_queue_sectors); + nr_requests = (megs * 2) & ~15; /* One per half-megabyte */ + if (megs < 30) + nr_requests /= 2; + if (nr_requests < 32) + nr_requests = 32; + if (nr_requests > 1024) + nr_requests = 1024; + /* notice early if anybody screwed the defaults */ + BUG_ON(!nr_requests); + BUG_ON(!max_queue_sectors); + + blk_grow_request_list(q, nr_requests, max_queue_sectors); - init_waitqueue_head(&q->wait_for_requests); + init_waitqueue_head(&q->wait_for_requests); spin_lock_init(&q->queue_lock); } diff -Nur linux-2.4.33-imedia/include/linux/elevator.h linux-2.4.33-imedia-patching/include/linux/elevator.h --- linux-2.4.33-imedia/include/linux/elevator.h 2003-08-25 14:44:44.000000000 +0300 +++ linux-2.4.33-imedia-patching/include/linux/elevator.h 2006-01-26 15:21:38.000000000 +0200 @@ -1,12 +1,9 @@ #ifndef _LINUX_ELEVATOR_H #define _LINUX_ELEVATOR_H -typedef void (elevator_fn) (struct request *, elevator_t *, - struct list_head *, - struct list_head *, int); - -typedef int (elevator_merge_fn) (request_queue_t *, struct request **, struct list_head *, - struct buffer_head *, int, int); +typedef int (elevator_merge_fn)(request_queue_t *, struct request **, + struct list_head *, struct buffer_head *bh, + int rw, int max_sectors); typedef void (elevator_merge_cleanup_fn) (request_queue_t *, struct request *, int); @@ -16,6 +13,7 @@ { int read_latency; int write_latency; + int max_bomb_segments; elevator_merge_fn *elevator_merge_fn; elevator_merge_req_fn *elevator_merge_req_fn; @@ -23,13 +21,13 @@ unsigned int queue_ID; }; -int elevator_noop_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int); -void elevator_noop_merge_cleanup(request_queue_t *, struct request *, int); -void elevator_noop_merge_req(struct request *, struct request *); - -int elevator_linus_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int); -void elevator_linus_merge_cleanup(request_queue_t *, struct request *, int); -void elevator_linus_merge_req(struct request *, struct request *); +elevator_merge_fn elevator_noop_merge; +elevator_merge_cleanup_fn elevator_noop_merge_cleanup; +elevator_merge_req_fn elevator_noop_merge_req; + +elevator_merge_fn elevator_linus_merge; +elevator_merge_cleanup_fn elevator_linus_merge_cleanup; +elevator_merge_req_fn elevator_linus_merge_req; typedef struct blkelv_ioctl_arg_s { int queue_ID; @@ -53,22 +51,6 @@ #define ELEVATOR_FRONT_MERGE 1 #define ELEVATOR_BACK_MERGE 2 -/* - * This is used in the elevator algorithm. We don't prioritise reads - * over writes any more --- although reads are more time-critical than - * writes, by treating them equally we increase filesystem throughput. - * This turns out to give better overall performance. -- sct - */ -#define IN_ORDER(s1,s2) \ - ((((s1)->rq_dev == (s2)->rq_dev && \ - (s1)->sector < (s2)->sector)) || \ - (s1)->rq_dev < (s2)->rq_dev) - -#define BHRQ_IN_ORDER(bh, rq) \ - ((((bh)->b_rdev == (rq)->rq_dev && \ - (bh)->b_rsector < (rq)->sector)) || \ - (bh)->b_rdev < (rq)->rq_dev) - static inline int elevator_request_latency(elevator_t * elevator, int rw) { int latency; @@ -86,7 +68,7 @@ ((elevator_t) { \ 0, /* read_latency */ \ 0, /* write_latency */ \ - \ + 0, /* max_bomb_segments */ \ elevator_noop_merge, /* elevator_merge_fn */ \ elevator_noop_merge_req, /* elevator_merge_req_fn */ \ }) @@ -95,7 +77,7 @@ ((elevator_t) { \ 128, /* read passovers */ \ 512, /* write passovers */ \ - \ + 6, /* max_bomb_segments */ \ elevator_linus_merge, /* elevator_merge_fn */ \ elevator_linus_merge_req, /* elevator_merge_req_fn */ \ })