块设备驱动之I/O调度层之调度器
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了块设备驱动之I/O调度层之调度器相关的知识,希望对你有一定的参考价值。
通过generic_make_request提交请求给I/O调度层,这个函数最后调用到q->make_request_fn(q, bio),那么对于这个函数的调用就是I/O调度层的入口点,首先来看看这个make_request_fn在哪被赋于能量的
void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn) { /* * set defaults */ q->nr_requests = BLKDEV_MAX_RQ; //最大的请求数为128 q->make_request_fn = mfn; //完成bio所描述的请求处理函数 blk_queue_dma_alignment(q, 511); //该函数用于告知内核块设备DMA 传送的内存对齐限制 blk_queue_congestion_threshold(q); //主要做流控 q->nr_batching = BLK_BATCH_REQ; blk_set_default_limits(&q->limits); //块设备在处理io时会受到一些参数(设备的queue limits参数)的影响,如请求中允许的最大扇区数 //这些参数都可以在/sys/block//queue/下查看,块设备在初始化时会设置默认值 /* * by default assume old behaviour and bounce for any highmem page */ //BLK_BOUNCE_HIGH:对高端内存页使用反弹缓冲 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); //此函数告知内核设备执行DMA时,可使用的最高物理地址dma_addr// }
从上面可以看出,这个函数是设置一些请求队列的参数,如请求数目,dma处理的时候的对齐,i/o参数和请求处理函数。下面需要层层剥丝,直到发现由哪个函数来处理我们的请求,还有使用怎么样的算法来处理这些请求队列。
struct request_queue * blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, spinlock_t *lock) { if (!q) return NULL; q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, 0); //申请blk_flush_queue if (!q->fq) return NULL; if (blk_init_rl(&q->root_rl, q, GFP_KERNEL)) //初始化request_list goto fail; q->request_fn = rfn; //请求处理函数,当内核期望驱动程序执行某些动作时,就会使用这个函数 q->prep_rq_fn = NULL; q->unprep_rq_fn = NULL; q->queue_flags |= QUEUE_FLAG_DEFAULT; /* Override internal queue lock with supplied lock pointer */ if (lock) q->queue_lock = lock; /* * This also sets hw/phys segments, boundary and size */ blk_queue_make_request(q, blk_queue_bio); //设置bio所描述的请求处理函数 q->sg_reserved_size = INT_MAX; /* Protect q->elevator from elevator_change */ mutex_lock(&q->sysfs_lock); /* init elevator */ if (elevator_init(q, NULL)) { //初始化调度算法 mutex_unlock(&q->sysfs_lock); goto fail; } mutex_unlock(&q->sysfs_lock); return q; fail: blk_free_flush_queue(q->fq); return NULL; }
如果被访问的设备是一个有queue的块设备,那么系统会调用blk_queue_bio函数进行bio的调度合并。
static void blk_queue_bio(struct request_queue *q, struct bio *bio) { const bool sync = !!(bio->bi_rw & REQ_SYNC); struct blk_plug *plug; int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT; struct request *req; unsigned int request_count = 0; /* * low level driver can indicate that it wants pages above a * certain limit bounced to low memory (ie for highmem, or even * ISA dma in theory) */
/* 为了建立bounce buffer,以防止不适合这次I/O操作的时候利用bounce buffer*/
blk_queue_bounce(q, &bio); if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { //数据完整性校验 bio_endio(bio, -EIO); return; } if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { spin_lock_irq(q->queue_lock); where = ELEVATOR_INSERT_FLUSH; goto get_rq; } /* * Check if we can merge with the plugged list before grabbing * any locks. */ if (!blk_queue_nomerges(q) && //请求队列不允许合并请求 blk_attempt_plug_merge(q, bio, &request_count)) //将bio合并到当前plugged的请求队列中 return; spin_lock_irq(q->queue_lock); el_ret = elv_merge(q, &req, bio); //elv_merge是核心函数,找到bio前向或者后向合并的请求 if (el_ret == ELEVATOR_BACK_MERGE) { //进行后向合并操作 if (bio_attempt_back_merge(q, req, bio)) { elv_bio_merged(q, req, bio); if (!attempt_back_merge(q, req)) elv_merged_request(q, req, el_ret); goto out_unlock; } } else if (el_ret == ELEVATOR_FRONT_MERGE) { // 进行前向合并操作 if (bio_attempt_front_merge(q, req, bio)) { elv_bio_merged(q, req, bio); if (!attempt_front_merge(q, req)) elv_merged_request(q, req, el_ret); goto out_unlock; } } /* 无法找到对应的请求实现合并 */ get_rq: /* * This sync check and mask will be re-done in init_request_from_bio(), * but we need to set it earlier to expose the sync flag to the * rq allocator and io schedulers. */ rw_flags = bio_data_dir(bio); if (sync) rw_flags |= REQ_SYNC; /* * Grab a free request. This is might sleep but can not fail. * Returns with the queue unlocked. */ req = get_request(q, rw_flags, bio, GFP_NOIO); //获取一个empty request请求 if (IS_ERR(req)) { bio_endio(bio, PTR_ERR(req)); /* @q is dead */ goto out_unlock; } /* * After dropping the lock and possibly sleeping here, our request * may now be mergeable after it had proven unmergeable (above). * We don‘t worry about that case for efficiency. It won‘t happen * often, and the elevators are able to handle it. */ init_request_from_bio(req, bio); //采用bio对request请求进行初始化 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags)) req->cpu = raw_smp_processor_id(); plug = current->plug; if (plug) { /* * If this is the first request added after a plug, fire * of a plug trace. */ if (!request_count) trace_block_plug(q); else { if (request_count >= BLK_MAX_REQUEST_COUNT) { blk_flush_plug_list(plug, false); //请求数量达到队列上限值,进行unplug操作 trace_block_plug(q); } } list_add_tail(&req->queuelist, &plug->list); //将请求加入到队列 blk_account_io_start(req, true); } else { spin_lock_irq(q->queue_lock); add_acct_request(q, req, where); __blk_run_queue(q); out_unlock: spin_unlock_irq(q->queue_lock); } }
对于 blk_queue_bio函数主要做了三件事情:
1) 进行请求的后向合并操作
2) 进行请求的前向合并操作
3) 如果无法合并请求,那么为 bio 创建一个 request ,然后进行调度
在 bio 合并过程中,最为关键的函数是 elv_merge 。该函数主要工作是判断 bio 是否可以进行后向合并或者前向合并。
int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) { struct elevator_queue *e = q->elevator; struct request *__rq; int ret; /* * Levels of merges: * nomerges: No merges at all attempted * noxmerges: Only simple one-hit cache try * merges: All merge tries attempted */ if (blk_queue_nomerges(q)) //请求队列不允许合并请求,则返回NO_MERGE return ELEVATOR_NO_MERGE; /* * First try one-hit cache. */ //last_merge指向最近进行合并操作的request,并成功合并
if (q->last_merge && elv_rq_merge_ok(q->last_merge, bio)) { ret = blk_try_merge(q->last_merge, bio); if (ret != ELEVATOR_NO_MERGE) { *req = q->last_merge; return ret; } } if (blk_queue_noxmerges(q))
return ELEVATOR_NO_MERGE; /* * See if our hash lookup can find a potential backmerge. */ __rq = elv_rqhash_find(q, bio->bi_iter.bi_sector); //根据bio的起始扇区号,通过rq的哈希表寻找一个request,可以将bio合并到request的尾部 if (__rq && elv_rq_merge_ok(__rq, bio)) { *req = __rq; return ELEVATOR_BACK_MERGE; } /*如果以上的方法不成功,则调用特定于io调度器的elevator_merge_fn函数寻找一个合适的request*/ if (e->type->ops.elevator_merge_fn) return e->type->ops.elevator_merge_fn(q, req, bio); return ELEVATOR_NO_MERGE; }
elevator_merge_fn是特定于I/O调度器的方式,涉及到调度的算法,留待下一章来分析。通过elv_merge,得到了bio是向前还是向后走到相应的处理接口中,下面来分别看看向前或者向后的处理方式。
1. elv_bio_merged
void elv_bio_merged(struct request_queue *q, struct request *rq, struct bio *bio) { struct elevator_queue *e = q->elevator; if (e->type->ops.elevator_bio_merged_fn) e->type->ops.elevator_bio_merged_fn(q, rq, bio); //调用调度算法的处理函数,这个只是针对cfq的算法提供 }
2. elv_merged_request
void elv_merged_request(struct request_queue *q, struct request *rq, int type) { struct elevator_queue *e = q->elevator; if (e->type->ops.elevator_merged_fn) e->type->ops.elevator_merged_fn(q, rq, type); //调用调度算法的合并函数 if (type == ELEVATOR_BACK_MERGE) elv_rqhash_reposition(q, rq); q->last_merge = rq; }
由上面来看,对于合并和调度都会用到一些算法的回调接口,下章主要针对调度算法来看看内核支持那些调度算法,各有什么优缺点。
以上是关于块设备驱动之I/O调度层之调度器的主要内容,如果未能解决你的问题,请参考以下文章