Upload
aman
View
33
Download
10
Embed Size (px)
DESCRIPTION
제 50 강 : Block IO Layer. Ch 13 Block I/O Layer. Vectored I/O (scatter-read, gather-write). P1. text. different permission (R W X ..) | different page. text. P2. a.out. data. data. lib. P3. lib. Reminder – Lion’s code. bread(buf) 4754 call bdevsw[].d_strategy(rbp) 4763 - PowerPoint PPT Presentation
Citation preview
1
Ch 13 Block I/O Layer
제 50 강 : Block IO Layer
2
Vectored I/O(scatter-read, gather-write)
data
lib
a.out
text
different permission(R W X ..)
|different
page
P1
P2
P3
data
lib
text
3
Reminder – Lion’s code• bread(buf) 4754• call bdevsw[].d_strategy(rbp)
4763• type bdevsw[] 4617• init bdevsw[] 4658 &rkstrategy()• rkstrategy() 5389 Removable Disk• 5410insert buf at front (actf)• 5412insert buf at last (actl)• 5415if disk is idle, …• ------------------------------------• invoke bread() N times if vectored I/O?• Disk head scheduling
4
lib
text
datatext
data
lib
5
6221 readi(aip)6222 struct inode *aip;6224 { int *bp;6225 int lbn, bn, on;6226 register dn, n;6227 register struct inode *ip;6229 ip = aip;6230 if(u.u_count == 0) return;6232 ip->i_flag =| IACC; /* inode = “accessed” */6233 if((ip->i_mode&IFMT) == IFCHR) { /* character file? */6234 (*cdevsw[ip->i_addr[0].d_major].d_read)(ip->i_addr[0]);6235 return;6235 }6238 do { lbn = bn = lshift(u.u_offset, -9);6240 on = u.u_offset[1] & 0777;6241 n = min(512-on, u.u_count);6242 if((ip->i_mode&IFMT) != IFBLK) {6243 dn = dpcmp(ip->i_size0&0377, ip->i_size1,6244 u.u_offset[0], u.u_offset[1]);6245 if(dn <= 0) return;6247 n = min(n, dn);6248 if ((bn = bmap(ip, lbn)) == 0) return;6250 dn = ip->i_dev;6251 } else {6252 dn = ip->i_addr[0];6253 rablock = bn+1;6254 }6255 if (ip->i_lastr+1 == lbn)6256 bp = breada(dn, bn, rablock); 6257 else6258 bp = bread(dn, bn);6259 ip->i_lastr = lbn;6260 iomove(bp, on, n, B_READ);6261 brelse(bp);6262 } while(u.u_error==0 && u.u_count!=0);6263 }
readi() 6221
Offset lbn bn
7
rkstrategy()5389 rkstrategy(abp)5390 struct buf *abp;5391 {5392 register struct buf *bp;5393 register *qc, *ql;5394 int d;53955396 bp = abp;5397 if(bp->b_flags&B_PHYS)5398 mapalloc(bp);5399 d = bp->b_dev.d_minor-7;5400 if(d <= 0)5401 d = 1;5402 if (bp->b_blkno >= NRKBLK*d) {5403 bp->b_flags =| B_ERROR;5404 iodone(bp);5405 return;5406 }5407 bp->av_forw = 0;5408 spl5();5409 if (rktab.d_actf==0) 5410 rktab.d_actf = bp; /5411 else5412 rktab.d_actl->av_forw = bp; 5413 rktab.d_actl = bp;5414 if (rktab.d_active==0)5415 rkstart();5416 spl0(); 5417 }
rktab: (5386)
d_actf d_actl
lib
text
datatext
data
lib
8
4520: struct buf4521: {4522: int b_flags; 4523: struct buf *b_forw; 4524: struct buf *b_back; 4525: struct buf *av_forw; 4526: struct buf *av_back; 4527: int b_dev; 4528: int b_wcount; 4529: char *b_addr; (low) 4530: char *b_xmem; (high) 4531: char *b_blkno; 4532: char b_error; 4533: char *b_resid; 4534: 4535: } buf[NBUF];
Lion’s codebuffer_header
low address
b_dev
b_blkno
Memory
buffer
sector
high address
lib
text
datatext
data
lib
9
Problems with Lion’s code Buffer Head
• [Time]– For vectored I/O, call N times.
• [Space]– N buffer headers – one for each
segment– Much information - replicated– Size of buffer head(metadata)
was large – whereas size of buffer was small
lib
text
datatext
data
lib
10
The bio structure• represents N block I/O operations • which consists of a list of N segments • each segment may be in separate locations in
memory
• Separate Roles:– “struct bio” (new)
• represents N I/O operations
– “buffer head”• provide mapping • memory buffer vs disk sector
Page 2Offset length
Page 1Offset Length
Page 0Offset Length
biomemory
page
text
data
lib
text
data
lib
11
The bio structure
struct bio
bi_io_vec
bi_idx
list of bio_vec
structures
struct bio_vec {struct page *bv_page;unsigned int bv_len;unsigned int bv_offset;
};
page structures involved in block I/O operation
Page 2Offset length
Page 1Offset Length
Page 0Offset Length
biomemory
page
texttext
data data
lib
lib
adjacent in disk
separate in memory
bio_vec
bio_vec
bio_vec
12
struct bio { sector_t bi_sector; struct bio *bi_next; /* request queue link */ struct block_device *bi_bdev; unsigned long bi_flags; /* status, command, etc */ unsigned short bi_vcnt; /* how many bio_vec's */ unsigned short bi_idx; /* current index into bvl_vec */
unsigned short bi_hw_segments; unsigned int bi_size; /* residual I/O count */ unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ struct bio_vec *bi_io_vec; /* the actual vec list */ bio_end_io_t *bi_end_io; atomic_t bi_cnt; /* pin count */ void *bi_private; bio_destructor_t *bi_destructor; /* destructor */}
struct biobio_vec
bio_vec
list of bio_vec structures
struct bio_vec {struct page *bv_page;unsigned int bv_len;unsigned int bv_offset;
};
bio_vec
13
Individual I/O Request
ppta.out
data
lib
text
data
lib
text
datadata
datafile
14
struct request { …… struct bio *cbio; /* next bio to submit
*/ struct bio *bio; /* next unfinished bio to
complete */ struct bio *biotail; void *elevator_private; int rq_status; struct gendisk *rq_disk; int errors; unsigned long start_time; ………..
unsigned short nr_phys_segments; int tag;
char *buffer; int ref_count; request_queue_t *q; ……}
I/O request i
bio1
bio2
bio_vec(text)
bio_vec(data)
bio_vec(…)
bio_vec(text)
bio_vec(data)
bio_vec(…)
bio3
Individual I/O Request
15
Request Queue per Device
• pending block I/O requests on block device.
• list of struct request • each request -- more than one bio
structure• control & status information• struct request_queue in
<linux/blkdev.h>
I/O request 1
I/O request 3
I/O request 2
bio
bio
bio_vec
bio_vec
bio_vec
Diskrequest queue
bio_vec
bio_vec
bio_vec
16
I/O Scheduler
• The Linus Elevator– If a request to an adjacent on-disk sector is
in the queue, two adjacent requests are merged into a single request
– insert new request into the queue at a spot near other requests operating on physically near sectors
– If a request in the queue is sufficiently old, the new request is inserted at the tail (prevent starvation)
17
2 types of device• Block Device
– Random access – fixed size chunk of data– navigate back & forth any location on media– eg disk for word file
• Character Device– Sequential access of stream of data (no back & forth allowed)
– eg keyboard, swap disk