-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.html
712 lines (387 loc) · 112 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
<!doctype html>
<html class="theme-next use-motion theme-next-mist">
<head>
<meta charset="UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" />
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1"/>
<link rel="stylesheet" type="text/css" href="/vendors/fancybox/source/jquery.fancybox.css?v=2.1.5"/>
<link rel="stylesheet" type="text/css" href="/css/main.css?v=0.4.3"/>
<meta name="keywords" content="Hexo,next" />
<link rel="shorticon icon" type="image/x-icon" href="/favicon.ico?v=0.4.3" />
<script type="text/javascript" id="hexo.configuration">
var CONFIG = {
scheme: 'Mist',
analytics: {
google: ''
},
sidebar: 'hide'
};
</script>
<title> wangzhilong's blog </title>
</head>
<body>
<!--[if lte IE 8]>
<div style=' clear: both; height: 59px; padding:0 0 0 15px; position: relative;margin:0 auto;'>
<a href="http://windows.microsoft.com/en-US/internet-explorer/products/ie/home?ocid=ie6_countdown_bannercode">
<img src="http://7u2nvr.com1.z0.glb.clouddn.com/picouterie.jpg" border="0" height="42" width="820"
alt="You are using an outdated browser. For a faster, safer browsing experience, upgrade for free today or use other browser ,like chrome firefox safari."
style='margin-left:auto;margin-right:auto;display: block;'/>
</a>
</div>
<![endif]-->
<div class="container one-column
page-home
">
<div class="headband"></div>
<div id="header" class="header">
<div class="header-inner">
<h1 class="site-meta">
<span class="logo-line-before"><i></i></span>
<a href="/" class="brand">
<span class="logo">
<i class="icon-logo"></i>
</span>
<span class="site-title">wangzhilong's blog</span>
</a>
<span class="logo-line-after"><i></i></span>
</h1>
<div class="site-nav-toggle">
<button>
<span class="btn-bar"></span>
<span class="btn-bar"></span>
<span class="btn-bar"></span>
</button>
</div>
<div class="site-nav">
<ul id="menu" class="menu ">
<li class="menu-item menu-item-home">
<a href="/">
<i class="menu-item-icon icon-home"></i> <br />
首页
</a>
</li>
<li class="menu-item menu-item-archives">
<a href="/archives">
<i class="menu-item-icon icon-archives"></i> <br />
归档
</a>
</li>
<li class="menu-item menu-item-tags">
<a href="/tags">
<i class="menu-item-icon icon-tags"></i> <br />
标签
</a>
</li>
</ul>
</div>
</div>
</div>
<div id="main" class="main">
<div class="main-inner">
<div id="content" class="content">
<div id="posts" class="posts-expand">
<div class="post post-type-normal ">
<div class="post-header">
<h1 class="post-title">
<a class="post-title-link" href="/2015/09/30/epoll/">
epoll源码学习
</a>
</h1>
<div class="post-meta">
<span class="post-time">
发表于 2015-09-30
</span>
<span class="post-comments-count">
|
<a href="/2015/09/30/epoll/#comments" >
<span class="post-comments-count ds-thread-count" data-thread-key="2015/09/30/epoll/"></span>
</a>
</span>
</div>
</div>
<div class="post-body">
<p>源码位置:fs/eventpoll.c</p>
<h4 id="函数使用">函数使用</h4><p>函数使用如下。源码分析中也是如此,只保留了关键代码语句。<br><figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br></pre></td><td class="code"><pre><span class="line">epoll_create(size);</span><br><span class="line"></span><br><span class="line"><span class="keyword">while</span> (...)</span><br><span class="line">{</span><br><span class="line"> <span class="comment">/* 等待就绪连接 */</span></span><br><span class="line"> epoll_wait( ... );</span><br><span class="line"></span><br><span class="line"> <span class="comment">/* 如有新连接,构造epoll_event结构体后 */</span></span><br><span class="line"> epoll_ctl( ... EPOLL_CTL_ADD ... );</span><br><span class="line"> <span class="comment">/* 如有断开连接 */</span></span><br><span class="line"> epoll_ctl( ... EPOLL_CTL_DEL ... );</span><br><span class="line">}</span><br></pre></td></tr></table></figure></p>
<h4 id="数据结构关系">数据结构关系</h4><p>epoll涉及数据结构关系如下图所示:</p>
<h2 id=""><img src="http://7xjw4u.com1.z0.glb.clouddn.com/epoll%E6%96%87%E4%BB%B6%E7%BB%93%E6%9E%84.png" alt="epoll文件结构"></h2><h4 id="源码分析">源码分析</h4><p>(1) 函数<em><code>int epoll_crate(int size)</code></em><br><figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br></pre></td><td class="code"><pre><span class="line"><span class="function">asmlinkage <span class="keyword">long</span> <span class="title">sys_epoll_create</span><span class="params">(<span class="keyword">int</span> size)</span></span><br><span class="line"></span>{</span><br><span class="line"> <span class="keyword">int</span> error, fd = -<span class="number">1</span>;</span><br><span class="line"> <span class="keyword">struct</span> eventpoll *ep;</span><br><span class="line"> <span class="keyword">struct</span> inode *inode;</span><br><span class="line"> <span class="keyword">struct</span> file *file;</span><br><span class="line"> ...</span><br><span class="line"> <span class="comment">// 为ep分配内存并进行初始化</span></span><br><span class="line"> <span class="keyword">if</span> (size <= <span class="number">0</span> || (error = ep_alloc(&ep)) != <span class="number">0</span>)</span><br><span class="line"> <span class="keyword">goto</span> error_return;</span><br><span class="line"> <span class="comment">/*</span><br><span class="line"> * 调用anon_inode_getfd新建一个file instance,也就是epoll可以看成一个文件(匿名文件)</span><br><span class="line"> * epoll所管理的所有的fd都是放在一个大的结构eventpoll(红黑树)中,</span><br><span class="line"> * 将主结构体struce eventpoll *ep放入file->private项中进行保存(sys_epoll_ctl会取用)</span><br><span class="line"> */</span></span><br><span class="line"> error = anon_inode_getfd(&fd, &inode, &file, <span class="string">"[eventpoll]"</span>,</span><br><span class="line"> &eventpoll_fops, ep);</span><br><span class="line"> <span class="keyword">return</span> fd;</span><br></pre></td></tr></table></figure></p>
<p>该函数创建一个epoll句柄,size用来告诉内核这个监听的数目一共有多大。当创建好epoll句柄后,它就是会占用一个fd值,所以在使用完epoll后,必须调用close()关闭,否则可能导致fd被耗尽。<br>代码结构比较清晰,调用ep_alloc分配一个eventpoll结构,调用anon_inode_getfd创建一个文件节点和文件描述符,并返回文件描述符,这个文件描述符供epoll自己使用。</p>
<hr>
<p>(2)函数<code>int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event)</code><br>epoll的事件注册函数,它不同于select()是在监听事件时告诉内核要监听什么类型的事件,而是在这里先注册要监听的事件类型。第一个参数是epoll_create的返回值,第二个参数表示动作,用三个宏来表示:<br><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line">EPOLL_CTL_ADD; // 注册新的fd到epfd中</span><br><span class="line">EPOLL_CTL_MOD; // 修改已经注册的fd的监听事件</span><br><span class="line">EPOLL_CTL_DEL; // 从epfd中删除一个fd</span><br></pre></td></tr></table></figure></p>
<p>第三个参数是需要监听的fd,第四个参数是告诉内核需要监听什么事,struct epoll_event的结构如下:<br><figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">struct</span> epoll_event{</span><br><span class="line"> <span class="keyword">__uint32_t</span> events; <span class="comment">/* epoll events */</span></span><br><span class="line"> <span class="keyword">epoll_data_t</span> data; <span class="comment">/* user data variable */</span></span><br><span class="line">}</span><br></pre></td></tr></table></figure></p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br></pre></td><td class="code"><pre><span class="line"><span class="function">asmlinkage <span class="keyword">long</span> <span class="title">sys_epoll_ctl</span><span class="params">(<span class="keyword">int</span> epfd, <span class="keyword">int</span> op, <span class="keyword">int</span> fd,</span><br><span class="line"> <span class="keyword">struct</span> epoll_event __user *event)</span></span><br><span class="line"></span>{</span><br><span class="line"> <span class="keyword">int</span> error;</span><br><span class="line"> <span class="keyword">struct</span> file *file, *tfile;</span><br><span class="line"> <span class="keyword">struct</span> eventpoll *ep;</span><br><span class="line"> <span class="keyword">struct</span> epitem *epi;</span><br><span class="line"> <span class="keyword">struct</span> epoll_event epds;</span><br><span class="line"> ...</span><br><span class="line"> <span class="comment">/* 判断参数的合法性,将__user *event复制给epds */</span></span><br><span class="line"> <span class="keyword">if</span> (ep_op_has_event(op) &&</span><br><span class="line"> copy_from_user(&epds, event, <span class="keyword">sizeof</span>(<span class="keyword">struct</span> epoll_event)))</span><br><span class="line"> <span class="keyword">goto</span> error_return;</span><br><span class="line"> file = fget(epfd); <span class="comment">// epoll fd 对应的文件对象</span></span><br><span class="line"> tfile = fget(fd); <span class="comment">// fd 对应的文件对象</span></span><br><span class="line"> ep = file->private_data;</span><br><span class="line"> mutex_lock(&ep->mtx);</span><br><span class="line"> <span class="comment">/* 防止重复添加(在ep的红黑树中查找是否已存在这个fd) */</span></span><br><span class="line"> epi = ep_find(ep, tfile, fd);</span><br><span class="line"> <span class="keyword">switch</span> (op) {</span><br><span class="line"> <span class="keyword">case</span> EPOLL_CTL_ADD:</span><br><span class="line"> <span class="keyword">if</span> (!epi) {</span><br><span class="line"> epds.events |= POLLERR | POLLHUP;</span><br><span class="line"> <span class="comment">// 在ep的红黑树中插入这个fd对应的epitem结构体</span></span><br><span class="line"> error = ep_insert(ep, &epds, tfile, fd);</span><br><span class="line"> }</span><br><span class="line"> ...</span><br><span class="line"> }</span><br><span class="line"> mutex_unlock(&ep->mtx);</span><br><span class="line"> ...</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
<p>去除错误检测,剩下的代码也比较清晰,首先取出epoll_create分配的eventpoll结构ep,然后使用ep_find在ep中查找当前操作的文件描述符,接下来有个判断,分不同操作进行,如果是EPOLL_CTL_ADD,则调ep_insert插入文件描述符,如果是EPOLL_CTL_DEL则调用ep_remove删除文件描述符,修改则用ep_modify。</p>
<p>下面我们进入<em><code>ep_insert()</code></em>函数中<br><figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br><span class="line">54</span><br><span class="line">55</span><br><span class="line">56</span><br><span class="line">57</span><br><span class="line">58</span><br><span class="line">59</span><br><span class="line">60</span><br><span class="line">61</span><br><span class="line">62</span><br></pre></td><td class="code"><pre><span class="line"><span class="function"><span class="keyword">static</span> <span class="keyword">int</span> <span class="title">ep_insert</span><span class="params">(<span class="keyword">struct</span> eventpoll *ep, <span class="keyword">struct</span> epoll_event *event, <span class="keyword">struct</span> file *tfile, <span class="keyword">int</span> fd)</span></span><br><span class="line"></span>{</span><br><span class="line"> <span class="keyword">int</span> error, revents, pwake = <span class="number">0</span>;</span><br><span class="line"> <span class="keyword">unsigned</span> <span class="keyword">long</span> flags;</span><br><span class="line"> <span class="keyword">struct</span> epitem *epi;</span><br><span class="line"> <span class="keyword">struct</span> ep_pqueue epq;</span><br><span class="line"> </span><br><span class="line"> <span class="comment">// 分配一个epitem结构体来保存每个加入的fd</span></span><br><span class="line"> <span class="keyword">if</span> (!(epi = kmem_cache_alloc(epi_cache, GFP_KERNEL)))</span><br><span class="line"> <span class="keyword">goto</span> error_return;</span><br><span class="line"> <span class="comment">// 初始化该结构体</span></span><br><span class="line"> ep_rb_initnode(&epi->rbn);</span><br><span class="line"> INIT_LIST_HEAD(&epi->rdllink);</span><br><span class="line"> INIT_LIST_HEAD(&epi->fllink);</span><br><span class="line"> INIT_LIST_HEAD(&epi->pwqlist);</span><br><span class="line"> epi->ep = ep;</span><br><span class="line"> ep_set_ffd(&epi->ffd, tfile, fd);</span><br><span class="line"> epi->event = *event;</span><br><span class="line"> epi->nwait = <span class="number">0</span>;</span><br><span class="line"> epi->next = EP_UNACTIVE_PTR;</span><br><span class="line"></span><br><span class="line"> <span class="comment">/* Initialize the poll table using the queue callback */</span></span><br><span class="line"> epq.epi = epi;</span><br><span class="line"> <span class="comment">// 安装poll回调函数</span></span><br><span class="line"> init_poll_funcptr(&epq.pt, ep_ptable_queue_proc);</span><br><span class="line"> <span class="comment">/*</span><br><span class="line"> * 调用poll函数来获取当前事件位,其实是利用它来调用注册函数</span><br><span class="line"> * ep_ptable_queue_proc (poll_wait中调用)</span><br><span class="line"> * 如果fd是套接字, f_op为socket_file_ops,poll函数是sock_poll()。</span><br><span class="line"> * 如果是TCP套接字的话,进而会调用tcp_poll()函数。此处调用poll函数查看</span><br><span class="line"> * 当前文件描述符的状态,存储在revents中。</span><br><span class="line"> * 在poll的处理函数tcp_poll()中, 会调用sock_poll_wait()。在sock_poll_wait中</span><br><span class="line"> * 会调用epq.pt.qproc指向的函数,也就是ep_ptable_queue_proc()</span><br><span class="line"> */</span></span><br><span class="line"> revents = tfile->f_op->poll(tfile, &epq.pt);</span><br><span class="line"> ...</span><br><span class="line"> ep_rbtree_insert(ep, epi); <span class="comment">// 将该epi插入到ep的红黑树中</span></span><br><span class="line"> <span class="comment">// revents & event->events: 刚才fop->poll的返回值中标识的事件有用户event关心的事件发生</span></span><br><span class="line"> <span class="comment">// !ep_is_linked(&epi->rdllink): epi的ready队列中有数据.ep_is_linked用于判断队列是否为空。</span></span><br><span class="line"> <span class="comment">/*</span><br><span class="line"> * 如果要监视的文件状态已经就绪并且还没有加入到就绪队列中,则将当前的epitem加入到就绪</span><br><span class="line"> * 队列中。如果有进程正在等待该文件的状态就绪,则唤醒一个等待的进程。</span><br><span class="line"> */</span></span><br><span class="line"> <span class="keyword">if</span> ((revents & event->events) && !ep_is_linked(&epi->rdllink)) {</span><br><span class="line"> <span class="keyword">list_add_t</span>ail(&epi->rdllink, &ep->rdllist); </span><br><span class="line"> <span class="comment">/*</span><br><span class="line"> * 如果有进程正在等待文件的状态就绪,也就是调用epoll_wait睡眠的进程正在</span><br><span class="line"> * 等待,则唤醒一个等待进程</span><br><span class="line"> * waitqueue_active(q) 等待队列q中有等待的进程返回1,否则返回0。</span><br><span class="line"> */</span></span><br><span class="line"> <span class="keyword">if</span> (waitqueue_active(&ep->wq))</span><br><span class="line"> __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE);</span><br><span class="line"> <span class="keyword">if</span> (waitqueue_active(&ep->wq))</span><br><span class="line"> __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE);</span><br><span class="line"> <span class="comment">/*</span><br><span class="line"> * 如果有进程等待eventpoll文件本身的事件就绪,则增加临时变量pwake的值,</span><br><span class="line"> * pwake的值不为0时,在释放lock后,会唤醒等待进程</span><br><span class="line"> */</span></span><br><span class="line"> <span class="keyword">if</span> (waitqueue_active(&ep->poll_wait))</span><br><span class="line"> pwake++;</span><br><span class="line"> }</span><br><span class="line"> ...</span><br></pre></td></tr></table></figure></p>
<p>在插入函数中<br><figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">init_poll_funcptr(&epq.pt, ep_ptable_queue_proc);</span><br><span class="line">revents = tfile->f_op->poll(tfile, &epq.pt);</span><br></pre></td></tr></table></figure></p>
<p>这两个函数将ep_ptable_queue_proc注册到epq.pt中的qproc。<br><figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">typedef</span> <span class="keyword">struct</span> <span class="keyword">poll_t</span>able_struct {</span><br><span class="line">poll_queue_proc qproc;</span><br><span class="line"><span class="keyword">unsigned</span> <span class="keyword">long</span> key;</span><br><span class="line">}<span class="keyword">poll_t</span>able;</span><br></pre></td></tr></table></figure></p>
<p>执行f_op->poll(tfile, &epq.pt)时,XXX_poll(tfile, &epq.pt)函数会执行poll_wait(),poll_wait()会调用epq.pt.qproc函数,即ep_ptable_queue_proc。<br>ep_ptable_queue_proc函数如下:<br><figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br></pre></td><td class="code"><pre><span class="line"><span class="function"><span class="keyword">static</span> <span class="keyword">void</span> <span class="title">ep_ptable_queue_proc</span><span class="params">(<span class="keyword">struct</span> file *file, wait_queue_head_t *whead, poll_table *pt)</span></span><br><span class="line"></span>{</span><br><span class="line"> <span class="keyword">struct</span> epitem *epi = ep_item_from_epqueue(pt);</span><br><span class="line"> <span class="keyword">struct</span> eppoll_entry *pwq;</span><br><span class="line"></span><br><span class="line"> <span class="keyword">if</span> (epi->nwait >= <span class="number">0</span> && (pwq = kmem_cache_alloc(pwq_cache, GFP_KERNEL))) {</span><br><span class="line"> init_waitqueue_func_entry(&pwq->wait, ep_poll_callback);</span><br><span class="line"> pwq->whead = whead;</span><br><span class="line"> pwq->base = epi;</span><br><span class="line"> add_wait_queue(whead, &pwq->wait);</span><br><span class="line"> <span class="keyword">list_add_t</span>ail(&pwq->llink, &epi->pwqlist);</span><br><span class="line"> epi->nwait++;</span><br><span class="line"> } <span class="keyword">else</span> {</span><br><span class="line"> <span class="comment">/* We have to signal that an error occurred */</span></span><br><span class="line"> epi->nwait = -<span class="number">1</span>;</span><br><span class="line"> }</span><br><span class="line">}</span><br></pre></td></tr></table></figure></p>
<p>在ep_ptable_queue_proc函数中,引入了另外一个非常重要的数据结构eppoll_entry。eppoll_entry主要完成epitem和epitem事件发生时的callback(ep_poll_callback)函数之间的关联。首先将eppoll_entry的whead指向fd的设备等待队列(同select中的wait_address),然后初始化eppoll_entry的base变量指向epitem,最后通过add_wait_queue将epoll_entry挂载到fd的设备等待队列上。完成这个动作后,epoll_entry已经被挂载到fd的设备等待队列。</p>
<p>由于ep_ptable_queue_proc函数设置了等待队列的ep_poll_callback回调函数。所以在设备硬件数据到来时,硬件中断处理函数中会唤醒该等待队列上等待的进程时,会调用唤醒函数ep_poll_callback。<br><figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br></pre></td><td class="code"><pre><span class="line"><span class="function"><span class="keyword">static</span> <span class="keyword">int</span> <span class="title">ep_poll_callback</span><span class="params">(wait_queue_t *wait, <span class="keyword">unsigned</span> mode, <span class="keyword">int</span> sync, <span class="keyword">void</span> *key)</span></span><br><span class="line"></span>{</span><br><span class="line"> <span class="keyword">int</span> pwake = <span class="number">0</span>;</span><br><span class="line"> <span class="keyword">unsigned</span> <span class="keyword">long</span> flags;</span><br><span class="line"> <span class="keyword">struct</span> epitem *epi = ep_item_from_wait(wait);</span><br><span class="line"> <span class="keyword">struct</span> eventpoll *ep = epi->ep;</span><br><span class="line"> ...</span><br><span class="line"> <span class="comment">//***关键***,将该fd加入到epoll监听的就绪链表中</span></span><br><span class="line"> <span class="keyword">list_add_t</span>ail(&epi->rdllink, &ep->rdllist);</span><br><span class="line"> ...</span><br><span class="line">}</span><br></pre></td></tr></table></figure></p>
<p>所以ep_poll_callback函数主要的功能是将被监视文件的等待事件就绪时,将文件对应的epitem实例添加到就绪队列中.</p>
<hr>
<p>(3) 函数 <code>int epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout)</code><br>等待事件的产生,类似于select()函数调用。参数events用来从内核得到事件的集合,maxevents告诉内核这个events有多大,这个maxevents的值不能大于创建epoll_create()时的size,参数timeout是超时时间。该函数返回需要处理的事件数目,如果返回0表示超时。<br><figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br></pre></td><td class="code"><pre><span class="line"><span class="function">asmlinkage <span class="keyword">long</span> <span class="title">sys_epoll_wait</span><span class="params">(<span class="keyword">int</span> epfd, <span class="keyword">struct</span> epoll_event __user *events, <span class="keyword">int</span> maxevents, <span class="keyword">int</span> timeout)</span></span><br><span class="line"></span>{</span><br><span class="line"> <span class="keyword">int</span> error;</span><br><span class="line"> <span class="keyword">struct</span> file *file;</span><br><span class="line"> <span class="keyword">struct</span> eventpoll *ep;</span><br><span class="line"> ...</span><br><span class="line"> file = fget(epfd);</span><br><span class="line"> ep = file->private_data;</span><br><span class="line"></span><br><span class="line"> <span class="comment">/* Time to fish for events ... */</span></span><br><span class="line"> error = ep_poll(ep, events, maxevents, timeout);</span><br><span class="line"> ...</span><br><span class="line">}</span><br></pre></td></tr></table></figure></p>
<p>该函数很简单,主要通过调用ep_poll获取结果。<br><figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br></pre></td><td class="code"><pre><span class="line"><span class="function"><span class="keyword">static</span> <span class="keyword">int</span> <span class="title">ep_poll</span><span class="params">(<span class="keyword">struct</span> eventpoll *ep, <span class="keyword">struct</span> epoll_event __user *events,</span><br><span class="line"> <span class="keyword">int</span> maxevents, <span class="keyword">long</span> timeout)</span></span><br><span class="line"></span>{</span><br><span class="line"> <span class="keyword">int</span> res, eavail;</span><br><span class="line"> <span class="keyword">unsigned</span> <span class="keyword">long</span> flags;</span><br><span class="line"> <span class="keyword">long</span> jtimeout;</span><br><span class="line"> <span class="keyword">wait_queue_t</span> wait;</span><br><span class="line"> ...</span><br><span class="line"> <span class="comment">/* 没有事件,所以需要睡眠。当有事件到来时,睡眠会被ep_poll_callback函数唤醒 */</span></span><br><span class="line"> <span class="keyword">if</span> (list_empty(&ep->rdllist)) {</span><br><span class="line"> <span class="comment">/* 将current进程放在wait这个等待队列中 */</span></span><br><span class="line"> init_waitqueue_entry(&wait, current);</span><br><span class="line"> wait.flags |= WQ_FLAG_EXCLUSIVE;</span><br><span class="line"> <span class="comment">/* 将当前进程加入到eventpoll的等待队列中,等待文件</span><br><span class="line"> * 状态就绪或直到超时,或被信号中断</span><br><span class="line"> */</span></span><br><span class="line"> __add_wait_queue(&ep->wq, &wait); </span><br><span class="line"> <span class="keyword">for</span> (;;) {</span><br><span class="line"> <span class="comment">/*</span><br><span class="line"> * 执行ep_poll_callback()唤醒时应当需要将当前进程唤醒,</span><br><span class="line"> * 所以当前进程状态应该为"可唤醒":TASK_INTERRUPTIBLE</span><br><span class="line"> */</span></span><br><span class="line"> set_current_state(TASK_INTERRUPTIBLE);</span><br><span class="line"> <span class="comment">/* 如果就绪队列不为空,也就是说已经有文件的状态就绪或者超时,则退出循环 */</span></span><br><span class="line"> <span class="keyword">if</span> (!list_empty(&ep->rdllist) || !jtimeout)</span><br><span class="line"> <span class="keyword">break</span>;</span><br><span class="line"> <span class="keyword">if</span> (signal_pending(current)) {</span><br><span class="line"> res = -EINTR;</span><br><span class="line"> <span class="keyword">break</span>;</span><br><span class="line"> }</span><br><span class="line"> spin_unlock_irqrestore(&ep->lock, flags);</span><br><span class="line"> jtimeout = <span class="keyword">schedule_t</span>imeout(jtimeout);</span><br><span class="line"> spin_lock_irqsave(&ep->lock, flags);</span><br><span class="line"> }</span><br><span class="line"> ...</span><br><span class="line"> <span class="keyword">if</span> (!res && eavail && !(res = ep_send_events(ep, events, maxevents)) && jtimeout)</span><br><span class="line"> <span class="keyword">goto</span> retry;</span><br></pre></td></tr></table></figure></p>
<p>ep_send_events函数向用户空间发送就绪事件。<br>ep_send_events()函数将用户传入的内存简单封装到ep_send_events_data结构中,然后调用ep_scan_ready_list()将就绪队列中的事件传入用户空间的内存。用户空间访问这个结果,进行处理。</p>
<hr>
<h4 id="总结">总结</h4><p>同过上面的代码浏览,大致清楚了epoll的逻辑:<br>(1) 通过epoll_create构建了一个文件结构,后续的所有操作都是在这个文件基础上。因此也就没有select中来回在用户空间和内核空间之间拷贝。<br>(2) epoll_ctl在插入事件时,也为该事件添加了回调函数,当该事件发生时,会被插入就绪队列中。因此也就避免了select的全部遍历事件。<br>(3) epoll_wait只是返回就绪队列中的事件。</p>
</div>
<div class="post-footer">
<div class="post-eof"></div>
</div>
</div>
<div class="post post-type-normal ">
<div class="post-header">
<h1 class="post-title">
<a class="post-title-link" href="/2015/09/16/django-static/">
django访问静态文件遇到的问题
</a>
</h1>
<div class="post-meta">
<span class="post-time">
发表于 2015-09-16
</span>
<span class="post-comments-count">
|
<a href="/2015/09/16/django-static/#comments" >
<span class="post-comments-count ds-thread-count" data-thread-key="2015/09/16/django-static/"></span>
</a>
</span>
</div>
</div>
<div class="post-body">
<p>在用django做管理后台时,遇到了下面的问题:<br><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><span class="line">[<span class="number">16</span>/Sep/<span class="number">2015</span> <span class="number">15</span>:<span class="number">58</span>:<span class="number">38</span>] <span class="string">"GET /video/static/admin/css/base.css HTTP/1.1"</span> <span class="number">404</span> <span class="number">105</span></span><br><span class="line">[<span class="number">16</span>/Sep/<span class="number">2015</span> <span class="number">15</span>:<span class="number">58</span>:<span class="number">38</span>] <span class="string">"GET /video/static/admin/css/changelists.css HTTP/1.1"</span> <span class="number">404</span> <span class="number">112</span></span><br><span class="line">[<span class="number">16</span>/Sep/<span class="number">2015</span> <span class="number">15</span>:<span class="number">58</span>:<span class="number">38</span>] <span class="string">"GET /video/static/admin/js/core.js HTTP/1.1"</span> <span class="number">404</span> <span class="number">103</span></span><br><span class="line">[<span class="number">16</span>/Sep/<span class="number">2015</span> <span class="number">15</span>:<span class="number">58</span>:<span class="number">38</span>] <span class="string">"GET /video/static/common/change_list.css?v=20150331 HTTP/1.1"</span> <span class="number">404</span> <span class="number">109</span></span><br></pre></td></tr></table></figure></p>
<p>原因:这些文件是django库自带的一些文件,并没有在我们服务的目录下面,因此找不到。<br>下面会对python访问静态文件做一些说明。<br>需要配置一些文件<br>(1) django_urls.py<br><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line">urlpatterns = patterns(<span class="string">''</span>,</span><br><span class="line"> url(<span class="string">r'^video/admin/'</span>, include(admin.site.urls)),</span><br><span class="line"> url(<span class="string">r'^video/(?P<path>.*)$'</span>, <span class="string">'pyutil.django.views.static.serve'</span>,</span><br><span class="line"> {<span class="string">'document_root'</span>: <span class="string">'.'</span>}),</span><br><span class="line">)</span><br></pre></td></tr></table></figure></p>
<p>document_root的值就是静态文件所在的目录,这里设置为当前目录,既工程所在的目录,在我的程序中就是video。<br>例如,访问:host/video/static/html/index.html,就会返回video/static/html文件夹下的index.html。<br>(2) settings.py<br><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><span class="line">BASE_DIR = os.path.dirname(os.path.dirname(__file__))</span><br><span class="line">STATIC_URL = <span class="string">'/video/static/'</span></span><br><span class="line">STATIC_ROOT = <span class="string">'static/'</span></span><br><span class="line">STATICFILES_DIRS = (</span><br><span class="line">os.path.join(BASE_DIR, <span class="string">"webroot/static"</span>),</span><br><span class="line">)</span><br></pre></td></tr></table></figure></p>
<p>其中,<strong><em>STATIC_URL</em></strong>是我们访问静态文件的路由。这里需要说明下<strong><em>STATIC_ROOT</em></strong>和<strong><em>STATICFILES_DIRS</em></strong>的关系。<br><strong><em>STATICFILES_DIRS</em></strong>是存放app对应的静态文件目录。们的服务是其中的一个app,这个目录也就是存放我的服务的静态文件。<br>配置中,还有其他的app,如下。每个app的静态文件都存放在相应的<strong><em>STATICFILES_DIRS</em></strong>目录中。<br><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br></pre></td><td class="code"><pre><span class="line">INSTALLED_APPS = (</span><br><span class="line"><span class="string">'django.contrib.admin'</span>,</span><br><span class="line"><span class="string">'django.contrib.auth'</span>,</span><br><span class="line"><span class="string">'django.contrib.contenttypes'</span>,</span><br><span class="line"><span class="string">'django.contrib.sessions'</span>,</span><br><span class="line"><span class="string">'django.contrib.messages'</span>,</span><br><span class="line"><span class="string">'django.contrib.staticfiles'</span>,</span><br><span class="line"><span class="string">'video.djangosite.service'</span>,</span><br><span class="line">)</span><br></pre></td></tr></table></figure></p>
<p>当 settiing.py中DEBUG = True 时,<strong>Django会自动到各个app中找到放在里面的静态文件</strong>。<br>如果执行 <code>python manage.py collectstatic</code> 。django就会收集各个app的STATICFILES_DIRS目录下的文件放到STATIC_ROOT中。</p>
<blockquote>
<p><em>注:STATIC_ROOT不能STATICFILES_DIRS相同,或者是其的子目录。否则就会报错:”ImproperlyConfigured: The STATICFILES_DIRS setting should not contain the STATIC_ROOT setting”</em></p>
</blockquote>
<p>针对最初的问题,我们可以在DEBUG模式下运行程序,或者收集静态文件到当前工程的目录下。</p>
</div>
<div class="post-footer">
<div class="post-eof"></div>
</div>
</div>
<div class="post post-type-normal ">
<div class="post-header">
<h1 class="post-title">
<a class="post-title-link" href="/2015/09/01/select/">
select源码学习
</a>
</h1>
<div class="post-meta">
<span class="post-time">
发表于 2015-09-01
</span>
<span class="post-comments-count">
|
<a href="/2015/09/01/select/#comments" >
<span class="post-comments-count ds-thread-count" data-thread-key="2015/09/01/select/"></span>
</a>
</span>
</div>
</div>
<div class="post-body">
<!-- toc -->
<ul>
<li><a href="#函数原型">函数原型</a></li>
<li><a href="#源码分析">源码分析</a><ul>
<li><a href="#入口函数sys_select">入口函数:sys_select</a></li>
<li><a href="#函数core_sys_select">函数core_sys_select</a></li>
<li><a href="#函数do_select">函数do_select</a></li>
</ul>
</li>
<li><a href="#总结">总结</a></li>
</ul>
<!-- tocstop -->
<h4 id="函数原型">函数原型</h4><figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment">/*</span><br><span class="line"> * maxfdp1: 指定待检测的描述符的个数</span><br><span class="line"> * readset: 读描述符集合</span><br><span class="line"> * writeset: 写描述符集合</span><br><span class="line"> * execeptset: 异常描述符集合</span><br><span class="line"> * timeout: 超时时间(一直、指定时间、不等待)</span><br><span class="line"> *</span><br><span class="line"> * 返回值:</span><br><span class="line"> * 0: 超时</span><br><span class="line"> * -1: 错误</span><br><span class="line"> * >0: 可进行读、写、异常操作描述符的大小。</span><br><span class="line"> */</span></span><br><span class="line"><span class="function"><span class="keyword">int</span> <span class="title">select</span><span class="params">(<span class="keyword">int</span> maxfdp1,fd_set *readset,fd_set *writeset,fd_set *exceptset,<span class="keyword">const</span> <span class="keyword">struct</span> timeval *timeout)</span></span></span><br></pre></td></tr></table></figure>
<p>上面的几个参数都是输入/输出参数,既传递输入值,存储返回结果。</p>
<hr>
<h4 id="源码分析">源码分析</h4><p>源码位置:fs/select.c</p>
<h5 id="入口函数sys_select">入口函数:sys_select</h5><p>我们在程序中调用select函数时,系统调用该函数。该函数只是处理了下超时时间参数,然后调用<code>core_sys_select</code>函数,最后计算剩余时间值。<br><figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br></pre></td><td class="code"><pre><span class="line"><span class="function">asmlinkage <span class="keyword">long</span> <span class="title">sys_select</span><span class="params">(<span class="keyword">int</span> n, fd_set __user *inp, fd_set __user *outp,fd_set __user *<span class="built_in">exp</span>, <span class="keyword">struct</span> timeval __user *tvp)</span></span><br><span class="line"></span>{</span><br><span class="line"> s64 timeout = -<span class="number">1</span>;</span><br><span class="line"> <span class="keyword">struct</span> timeval tv;</span><br><span class="line"> <span class="keyword">int</span> ret;</span><br><span class="line"></span><br><span class="line"> <span class="keyword">if</span> (tvp) {</span><br><span class="line"> <span class="comment">/* 将超时时间拷贝到内核空间 */</span></span><br><span class="line"> <span class="keyword">if</span> (copy_from_user(&tv, tvp, <span class="keyword">sizeof</span>(tv)))</span><br><span class="line"> <span class="keyword">return</span> -EFAULT;</span><br><span class="line"></span><br><span class="line"> <span class="comment">/* 非法时间 */</span></span><br><span class="line"> <span class="keyword">if</span> (tv.tv_sec < <span class="number">0</span> || tv.tv_usec < <span class="number">0</span>)</span><br><span class="line"> <span class="keyword">return</span> -EINVAL;</span><br><span class="line"></span><br><span class="line"> <span class="comment">/* Cast to u64 to make GCC stop complaining */</span></span><br><span class="line"> <span class="keyword">if</span> ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS)</span><br><span class="line"> timeout = -<span class="number">1</span>; <span class="comment">/* infinite */</span></span><br><span class="line"> <span class="keyword">else</span> {</span><br><span class="line"> <span class="comment">/* 计算出超时时间(转换为始终周期数) */</span></span><br><span class="line"> timeout = DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC/HZ);</span><br><span class="line"> timeout += tv.tv_sec * HZ;</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"> </span><br><span class="line"> ret = core_sys_select(n, inp, outp, <span class="built_in">exp</span>, &timeout);</span><br><span class="line"> <span class="keyword">if</span> (tvp) {</span><br><span class="line"> <span class="keyword">struct</span> timeval rtv;</span><br><span class="line"></span><br><span class="line"> <span class="keyword">if</span> (current->personality & STICKY_TIMEOUTS)</span><br><span class="line"> <span class="keyword">goto</span> sticky;</span><br><span class="line"> <span class="comment">/* rtv 是剩余值*/</span></span><br><span class="line"> rtv.tv_usec = <span class="keyword">jiffies_t</span>o_usecs(do_div((*(u64*)&timeout), HZ));</span><br><span class="line"> rtv.tv_sec = timeout;</span><br><span class="line"> <span class="keyword">if</span> (timeval_compare(&rtv, &tv) >= <span class="number">0</span>)</span><br><span class="line"> rtv =<span class="function">smlinkage <span class="keyword">long</span> <span class="title">sys_select</span><span class="params">()</span> tv</span>;</span><br><span class="line"> <span class="keyword">if</span> (<span class="keyword">copy_t</span>o_user(tvp, &rtv, <span class="keyword">sizeof</span>(rtv))) {</span><br><span class="line">sticky:</span><br><span class="line"> <span class="comment">/*</span><br><span class="line"> * If an application puts its timeval in read-only</span><br><span class="line"> * memory, we don't want the Linux-specific update to</span><br><span class="line"> * the timeval to cause a fault after the select has</span><br><span class="line"> * completed successfully. However, because we're not</span><br><span class="line"> * updating the timeval, we can't restart the system</span><br><span class="line"> * call.</span><br><span class="line"> */</span></span><br><span class="line"> <span class="keyword">if</span> (ret == -ERESTARTNOHAND)</span><br><span class="line"> ret = -EINTR;</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"></span><br><span class="line"> <span class="keyword">return</span> ret;</span><br><span class="line">}</span><br></pre></td></tr></table></figure></p>
<h5 id="函数core_sys_select">函数core_sys_select</h5><p>该函数主要做了以下事情:</p>
<ol>
<li>fix传入的n值,使其不会超过文件描述符的最大值。这就是select能同时处理的连接受限的原因。</li>
<li>为描述符分配空间,将描述符从用户空间拷贝到内核空间。</li>
<li>调用<code>do_select</code>执行真正的IO复用。</li>
<li>将3的结果从用户内核空间拷贝的用户空间,返回给程序。<figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br><span class="line">54</span><br><span class="line">55</span><br><span class="line">56</span><br><span class="line">57</span><br><span class="line">58</span><br><span class="line">59</span><br><span class="line">60</span><br><span class="line">61</span><br><span class="line">62</span><br><span class="line">63</span><br><span class="line">64</span><br><span class="line">65</span><br><span class="line">66</span><br><span class="line">67</span><br><span class="line">68</span><br><span class="line">69</span><br><span class="line">70</span><br><span class="line">71</span><br><span class="line">72</span><br><span class="line">73</span><br><span class="line">74</span><br><span class="line">75</span><br><span class="line">76</span><br><span class="line">77</span><br><span class="line">78</span><br><span class="line">79</span><br><span class="line">80</span><br></pre></td><td class="code"><pre><span class="line"><span class="function"><span class="keyword">static</span> <span class="keyword">int</span> <span class="title">core_sys_select</span><span class="params">(<span class="keyword">int</span> n, fd_set __user *inp, fd_set __user *outp, fd_set __user *<span class="built_in">exp</span>, s64 *timeout)</span></span><br><span class="line"></span>{</span><br><span class="line"> fd_set_bits fds; <span class="comment">/* 参考后面的数据结构 */</span></span><br><span class="line"> <span class="keyword">void</span> *bits;</span><br><span class="line"> <span class="keyword">int</span> ret, max_fds;</span><br><span class="line"> <span class="keyword">unsigned</span> <span class="keyword">int</span> size;</span><br><span class="line"> <span class="keyword">struct</span> fdtable *fdt;</span><br><span class="line"> <span class="comment">/* Allocate small arguments on the stack to save memory and be faster */</span></span><br><span class="line"> <span class="keyword">long</span> stack_fds[SELECT_STACK_ALLOC/<span class="keyword">sizeof</span>(<span class="keyword">long</span>)];</span><br><span class="line"></span><br><span class="line"> ret = -EINVAL;</span><br><span class="line"> <span class="keyword">if</span> (n < <span class="number">0</span>)</span><br><span class="line"> <span class="keyword">goto</span> out_nofds;</span><br><span class="line"></span><br><span class="line"> <span class="comment">/* max_fds can increase, so grab it once to avoid race */</span></span><br><span class="line"> rcu_read_lock();</span><br><span class="line"> <span class="comment">/* 获取当前进程的文件描述符表 */</span></span><br><span class="line"> fdt = files_fdtable(current->files);</span><br><span class="line"> max_fds = fdt->max_fds;</span><br><span class="line"> rcu_read_unlock();</span><br><span class="line"> <span class="comment">/* 修正用户传入的第一个参数:fd_set中文件描述符的最大值 */</span></span><br><span class="line"> <span class="keyword">if</span> (n > max_fds)</span><br><span class="line"> n = max_fds;</span><br><span class="line"></span><br><span class="line"> <span class="comment">/*</span><br><span class="line"> * We need 6 bitmaps (in/out/ex for both incoming and outgoing),</span><br><span class="line"> * since we used fdset we need to allocate memory in units of</span><br><span class="line"> * long-words.</span><br><span class="line"> */</span></span><br><span class="line"> size = FDS_BYTES(n); <span class="comment">//n个bit位需要的字节数</span></span><br><span class="line"> bits = stack_fds;</span><br><span class="line"> <span class="comment">/* 读、写、异常,输入&输出,共需6倍大小的描述符空间 */</span></span><br><span class="line"> <span class="keyword">if</span> (size > <span class="keyword">sizeof</span>(stack_fds) / <span class="number">6</span>) {</span><br><span class="line"> <span class="comment">/* Not enough space in on-stack array; must use kmalloc */</span></span><br><span class="line"> ret = -ENOMEM;</span><br><span class="line"> bits = kmalloc(<span class="number">6</span> * size, GFP_KERNEL);</span><br><span class="line"> <span class="keyword">if</span> (!bits)</span><br><span class="line"> <span class="keyword">goto</span> out_nofds;</span><br><span class="line"> }</span><br><span class="line"> fds.in = bits;</span><br><span class="line"> fds.out = bits + size;</span><br><span class="line"> fds.ex = bits + <span class="number">2</span>*size;</span><br><span class="line"> fds.res_in = bits + <span class="number">3</span>*size;</span><br><span class="line"> fds.res_out = bits + <span class="number">4</span>*size;</span><br><span class="line"> fds.res_ex = bits + <span class="number">5</span>*size;</span><br><span class="line"></span><br><span class="line"> <span class="comment">/* get_fd_set仅仅调用copy_from_user从用户空间拷贝了fd_set */</span></span><br><span class="line"> <span class="keyword">if</span> ((ret = get_fd_set(n, inp, fds.in)) ||</span><br><span class="line"> (ret = get_fd_set(n, outp, fds.out)) ||</span><br><span class="line"> (ret = get_fd_set(n, <span class="built_in">exp</span>, fds.ex)))</span><br><span class="line"> <span class="keyword">goto</span> out;</span><br><span class="line"> zero_fd_set(n, fds.res_in);</span><br><span class="line"> zero_fd_set(n, fds.res_out);</span><br><span class="line"> zero_fd_set(n, fds.res_ex);</span><br><span class="line"></span><br><span class="line"> ret = do_select(n, &fds, timeout);</span><br><span class="line"></span><br><span class="line"> <span class="keyword">if</span> (ret < <span class="number">0</span>)</span><br><span class="line"> <span class="keyword">goto</span> out;</span><br><span class="line"> <span class="keyword">if</span> (!ret) {</span><br><span class="line"> ret = -ERESTARTNOHAND;</span><br><span class="line"> <span class="keyword">if</span> (signal_pending(current))</span><br><span class="line"> <span class="keyword">goto</span> out;</span><br><span class="line"> ret = <span class="number">0</span>;</span><br><span class="line"> }</span><br><span class="line"></span><br><span class="line"> <span class="comment">/*</span><br><span class="line"> * 把结果集拷贝到用户空间</span><br><span class="line"> */</span></span><br><span class="line"> <span class="keyword">if</span> (set_fd_set(n, inp, fds.res_in) ||</span><br><span class="line"> set_fd_set(n, outp, fds.res_out) ||</span><br><span class="line"> set_fd_set(n, <span class="built_in">exp</span>, fds.res_ex))</span><br><span class="line"> ret = -EFAULT;</span><br><span class="line"></span><br><span class="line">out:</span><br><span class="line"> <span class="keyword">if</span> (bits != stack_fds)</span><br><span class="line"> kfree(bits);</span><br><span class="line">out_nofds:</span><br><span class="line"> <span class="keyword">return</span> ret;</span><br><span class="line">}</span><br></pre></td></tr></table></figure>
</li>
</ol>
<h5 id="函数do_select">函数do_select</h5><p>该函数会遍历所有的描述符,返回有事件发生的描述符。其中涉及到poll_wait相关的进程休眠、唤醒相关的代码没有去看,但这不影响我们对select的理解。<br><figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br><span class="line">54</span><br><span class="line">55</span><br><span class="line">56</span><br><span class="line">57</span><br><span class="line">58</span><br><span class="line">59</span><br><span class="line">60</span><br><span class="line">61</span><br><span class="line">62</span><br><span class="line">63</span><br><span class="line">64</span><br><span class="line">65</span><br><span class="line">66</span><br><span class="line">67</span><br><span class="line">68</span><br><span class="line">69</span><br><span class="line">70</span><br><span class="line">71</span><br><span class="line">72</span><br><span class="line">73</span><br><span class="line">74</span><br><span class="line">75</span><br><span class="line">76</span><br><span class="line">77</span><br><span class="line">78</span><br><span class="line">79</span><br><span class="line">80</span><br><span class="line">81</span><br><span class="line">82</span><br><span class="line">83</span><br><span class="line">84</span><br><span class="line">85</span><br><span class="line">86</span><br><span class="line">87</span><br><span class="line">88</span><br><span class="line">89</span><br><span class="line">90</span><br><span class="line">91</span><br><span class="line">92</span><br><span class="line">93</span><br><span class="line">94</span><br><span class="line">95</span><br><span class="line">96</span><br><span class="line">97</span><br><span class="line">98</span><br><span class="line">99</span><br><span class="line">100</span><br><span class="line">101</span><br><span class="line">102</span><br><span class="line">103</span><br><span class="line">104</span><br><span class="line">105</span><br><span class="line">106</span><br><span class="line">107</span><br><span class="line">108</span><br><span class="line">109</span><br></pre></td><td class="code"><pre><span class="line"><span class="function"><span class="keyword">int</span> <span class="title">do_select</span><span class="params">(<span class="keyword">int</span> n, fd_set_bits *fds, s64 *timeout)</span></span><br><span class="line"></span>{</span><br><span class="line"> <span class="keyword">struct</span> poll_wqueues table;</span><br><span class="line"> <span class="keyword">poll_t</span>able *wait;</span><br><span class="line"> <span class="keyword">int</span> retval, i;</span><br><span class="line"></span><br><span class="line"> rcu_read_lock();</span><br><span class="line"> <span class="comment">/*</span><br><span class="line"> * 根据已经打开fd的位图检查用户打开的fd,要求对应的fd必须打开,</span><br><span class="line"> * 并且返回最大的fd</span><br><span class="line"> */</span></span><br><span class="line"> retval = max_select_fd(n, fds);</span><br><span class="line"> rcu_read_unlock();</span><br><span class="line"></span><br><span class="line"> <span class="keyword">if</span> (retval < <span class="number">0</span>)</span><br><span class="line"> <span class="keyword">return</span> retval;</span><br><span class="line"> n = retval;</span><br><span class="line"></span><br><span class="line"> <span class="comment">/*将当前进程放入自已的等待队列table, 并将该等待队列加入到该测试表wait*/</span></span><br><span class="line"> poll_initwait(&table);</span><br><span class="line"> wait = &table.pt;</span><br><span class="line"> <span class="keyword">if</span> (!*timeout)</span><br><span class="line"> wait = NULL;</span><br><span class="line"> retval = <span class="number">0</span>;</span><br><span class="line"> <span class="keyword">for</span> (;;) {</span><br><span class="line"> <span class="keyword">unsigned</span> <span class="keyword">long</span> *rinp, *routp, *rexp, *inp, *outp, *<span class="built_in">exp</span>;</span><br><span class="line"> <span class="keyword">long</span> <span class="keyword">__t</span>imeout;</span><br><span class="line"></span><br><span class="line"> set_current_state(TASK_INTERRUPTIBLE);</span><br><span class="line"></span><br><span class="line"> inp = fds->in; outp = fds->out; <span class="built_in">exp</span> = fds->ex;</span><br><span class="line"> rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex;</span><br><span class="line"></span><br><span class="line"> <span class="keyword">for</span> (i = <span class="number">0</span>; i < n; ++rinp, ++routp, ++rexp) {</span><br><span class="line"> <span class="keyword">unsigned</span> <span class="keyword">long</span> in, out, ex, all_bits, bit = <span class="number">1</span>, mask, j;</span><br><span class="line"> <span class="keyword">unsigned</span> <span class="keyword">long</span> res_in = <span class="number">0</span>, res_out = <span class="number">0</span>, res_ex = <span class="number">0</span>;</span><br><span class="line"> <span class="keyword">const</span> <span class="keyword">struct</span> file_operations *f_op = NULL;</span><br><span class="line"> <span class="keyword">struct</span> file *file = NULL;</span><br><span class="line"></span><br><span class="line"> in = *inp++; out = *outp++; ex = *<span class="built_in">exp</span>++;</span><br><span class="line"> all_bits = in | out | ex;</span><br><span class="line"> <span class="comment">/* 由于数据描述符表是用的long,所以每次i要加上long所占的bit位数 */</span></span><br><span class="line"> <span class="keyword">if</span> (all_bits == <span class="number">0</span>) {</span><br><span class="line"> i += __NFDBITS;</span><br><span class="line"> <span class="keyword">continue</span>;</span><br><span class="line"> <span class="comment">/* 有事件发生,遍历每一个bit */</span></span><br><span class="line"> <span class="keyword">for</span> (j = <span class="number">0</span>; j < __NFDBITS; ++j, ++i, bit <<= <span class="number">1</span>) {</span><br><span class="line"> <span class="keyword">int</span> fput_needed;</span><br><span class="line"> <span class="keyword">if</span> (i >= n)</span><br><span class="line"> <span class="keyword">break</span>;</span><br><span class="line"> <span class="keyword">if</span> (!(bit & all_bits))</span><br><span class="line"> <span class="keyword">continue</span>;</span><br><span class="line"> file = fget_light(i, &fput_needed);</span><br><span class="line"> <span class="keyword">if</span> (file) {</span><br><span class="line"> f_op = file->f_op;</span><br><span class="line"> mask = DEFAULT_POLLMASK;</span><br><span class="line"> <span class="keyword">if</span> (f_op && f_op->poll)</span><br><span class="line"> mask = (*f_op->poll)(file, retval ? NULL : wait);</span><br><span class="line"> fput_light(file, fput_needed);</span><br><span class="line"> <span class="keyword">if</span> ((mask & POLLIN_SET) && (in & bit)) {</span><br><span class="line"> res_in |= bit;</span><br><span class="line"> retval++;</span><br><span class="line"> }</span><br><span class="line"> <span class="keyword">if</span> ((mask & POLLOUT_SET) && (out & bit)) {</span><br><span class="line"> res_out |= bit;</span><br><span class="line"> retval++;</span><br><span class="line"> }</span><br><span class="line"> <span class="keyword">if</span> ((mask & POLLEX_SET) && (ex & bit)) {</span><br><span class="line"> res_ex |= bit;</span><br><span class="line"> retval++;</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"> cond_resched();</span><br><span class="line"> }</span><br><span class="line"> <span class="keyword">if</span> (res_in)</span><br><span class="line"> *rinp = res_in;</span><br><span class="line"> <span class="keyword">if</span> (res_out)</span><br><span class="line"> *routp = res_out;</span><br><span class="line"> <span class="keyword">if</span> (res_ex)</span><br><span class="line"> *rexp = res_ex;</span><br><span class="line"> }</span><br><span class="line"> wait = NULL;</span><br><span class="line"> <span class="keyword">if</span> (retval || !*timeout || signal_pending(current))</span><br><span class="line"> <span class="keyword">break</span>;</span><br><span class="line"> <span class="keyword">if</span>(table.error) {</span><br><span class="line"> retval = table.error;</span><br><span class="line"> <span class="keyword">break</span>;</span><br><span class="line"> }</span><br><span class="line"> <span class="keyword">if</span> (*timeout < <span class="number">0</span>) {</span><br><span class="line"> <span class="comment">/* Wait indefinitely */</span></span><br><span class="line"> <span class="keyword">__t</span>imeout = MAX_SCHEDULE_TIMEOUT;</span><br><span class="line"> } <span class="keyword">else</span> <span class="keyword">if</span> (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT - <span class="number">1</span>)) {</span><br><span class="line"> <span class="comment">/* Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in a loop */</span></span><br><span class="line"> <span class="keyword">__t</span>imeout = MAX_SCHEDULE_TIMEOUT - <span class="number">1</span>;</span><br><span class="line"> *timeout -= <span class="keyword">__t</span>imeout;</span><br><span class="line"> } <span class="keyword">else</span> {</span><br><span class="line"> <span class="keyword">__t</span>imeout = *timeout;</span><br><span class="line"> *timeout = <span class="number">0</span>;</span><br><span class="line"> }</span><br><span class="line"> <span class="keyword">__t</span>imeout = <span class="keyword">schedule_t</span>imeout(<span class="keyword">__t</span>imeout);</span><br><span class="line"> <span class="keyword">if</span> (*timeout >= <span class="number">0</span>)</span><br><span class="line"> *timeout += <span class="keyword">__t</span>imeout;</span><br><span class="line"> }</span><br><span class="line"> __set_current_state(TASK_RUNNING);</span><br><span class="line"></span><br><span class="line"> poll_freewait(&table);</span><br><span class="line"></span><br><span class="line"> <span class="keyword">return</span> retval;</span><br><span class="line">}</span><br></pre></td></tr></table></figure></p>
<h4 id="总结">总结</h4><p>select主要做了3件事情:</p>
<ol>
<li>将需要监测的描述符从用户空间拷贝到内核空间;</li>
<li>遍历描述符,返回有事件发生的描述符;</li>
<li>将发生的描述符从内核空间拷贝到用户空间。</li>
</ol>
<p>由于需要将全部描述符在内核空间和用户空间的来回拷贝、以及遍历,造成了效率的不高;而且描述符的个数也受系统最大文件描述符的限制。</p>
<hr>
<p><strong><em>一些数据数据结构</em></strong><br>fd_set_bits<br><figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">typedef</span> <span class="keyword">struct</span> {</span><br><span class="line"> <span class="keyword">unsigned</span> <span class="keyword">long</span> *in, *out, *ex;</span><br><span class="line"> <span class="keyword">unsigned</span> <span class="keyword">long</span> *res_in, *res_out, *res_ex;</span><br><span class="line">} fd_set_bits;</span><br><span class="line"></span><br><span class="line"><span class="comment">/*</span><br><span class="line"> * How many longwords for "nr" bits?</span><br><span class="line"> */</span></span><br><span class="line"> <span class="preprocessor">#<span class="keyword">define</span> FDS_BITPERLONG (8*sizeof(long))</span></span><br><span class="line"> <span class="preprocessor">#<span class="keyword">define</span> FDS_LONGS(nr) ((nr)+FDS_BITPERLONG-1)/FDS_BITPERLONG)</span></span><br><span class="line"> <span class="preprocessor">#<span class="keyword">define</span> FDS_BYTES(nr) (FDS_LONG(nr)*sizeof(long))</span></span><br></pre></td></tr></table></figure></p>
<p>fd_set_bits,标识出可读、可写、异常描述符表的输入和输出。<br>下面三个宏分别表示:long类型的bit数、nr个bit位需要几个long类型存放、nr个bt位存放需要的字节数。<br><figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">typedef</span> stuct <span class="keyword">poll_t</span>able_stuct{</span><br><span class="line"> poll_queue_proc qproc;</span><br><span class="line"> <span class="keyword">unsigned</span> <span class="keyword">long</span> key;</span><br><span class="line">} <span class="keyword">poll_t</span>able;</span><br></pre></td></tr></table></figure></p>
<p>poll_table:对每个文件进行poll操作时,判读是否能够非阻塞的进行key值(poll事件组成)标识的I/O操作;如果不能,则调用回调函数qproc将进程添加到文件的poll等待队列中。<br><figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">struct</span> <span class="keyword">poll_t</span>able_entry {</span><br><span class="line"> stuct file *file;</span><br><span class="line"> <span class="keyword">unsigned</span> <span class="keyword">long</span> key;</span><br><span class="line"> <span class="keyword">wait_queue_t</span> wait;</span><br><span class="line"> <span class="keyword">wait_queue_head_t</span> *wait_address;</span><br><span class="line">}</span><br></pre></td></tr></table></figure></p>
<p>poll_table_entry: 用于阻塞进程并将进程添加到文件的poll等待队列中,一个文件对应一个poll_table_entry。<br><figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br></pre></td><td class="code"><pre><span class="line">stuct poll_wqueues {</span><br><span class="line"> <span class="keyword">poll_t</span>able pt;</span><br><span class="line"> <span class="keyword">struct</span> <span class="keyword">poll_t</span>able_page *table;</span><br><span class="line"> <span class="keyword">struct</span> task_struct *<span class="keyword">polling_t</span>ask;</span><br><span class="line"> <span class="keyword">int</span> triggered;</span><br><span class="line"> <span class="keyword">int</span> error;</span><br><span class="line"> <span class="keyword">int</span> inline_index;</span><br><span class="line"> <span class="keyword">struct</span> <span class="keyword">poll_t</span>able_entry inline_entries[N_INLINE_POLL_ENTRIES];</span><br><span class="line">};</span><br></pre></td></tr></table></figure></p>
<p>poll_wqueues: 用于在select/poll时,如果需要阻塞进程,将进程添加到描述表标识的所有文件的poll等待队列中,以便任意一个文件可进行非阻塞I/O操作时唤醒进程。</p>
</div>
<div class="post-footer">
<div class="post-eof"></div>
</div>
</div>
<div class="post post-type-normal ">
<div class="post-header">
<h1 class="post-title">
<a class="post-title-link" href="/2015/08/28/socket/">
listen和accept中的sokcet关系
</a>
</h1>
<div class="post-meta">
<span class="post-time">
发表于 2015-08-28
</span>
<span class="post-comments-count">
|
<a href="/2015/08/28/socket/#comments" >
<span class="post-comments-count ds-thread-count" data-thread-key="2015/08/28/socket/"></span>
</a>
</span>
</div>
</div>
<div class="post-body">
<p>我们学习网络编程时,一个服务端程序通常是:socket()->bind()->listen(),当有客户端程序connect()时,服务端accept()处理。下面是我在学习中遇到的几个问题,以及理解。</p>
<h5 id="问题1客户端和服务端的socket是怎么通信的">问题1:客户端和服务端的socket是怎么通信的?</h5><p>我们知道,socket有一个四元组:(目的ip, 目的port、源ip,源port)。这个四元组就可以保证客户端和服务端之间可以通信。<br>比如服务端是个80的web服务,ip是1.1.1.1。服务端启动时会生成一个socket绑定到80端口监听。这时一个ip为 2.2.2.2的客户端要去访问这个服务端时,先要生成一个socket。系统会为这个socket选择一个端口,比如65535.那么客户端通个(1.1.1.1, 80, 2.2.2.2, 65535)通过connect()访问服务端。</p>
<h5 id="问题2accept中的socket和listen监听的socket的端口相同吗">问题2:accept中的socket和listen监听的socket的端口相同吗?</h5><p>客户端通过connect()访问服务端80,通过3次握手后,进入accept队列。服务端调用accept()时,会生成一个新的socket和客户端通信,之前的socket仍然继续监听80端口。那么这个新生成的端口还是80吗?答案是肯定的,否则客户端那边的socket就无法和新生成的socket通信。看accept源码,会发现新生成的socket会拷贝监听socket的信息。因此两者的端口号相同。</p>
<h5 id="问题3服务端是怎么区分客户端的信息是给listen的socket还是accept的socket的">问题3:服务端是怎么区分客户端的信息是给listen的socket还是accept的socket的?</h5><p>如果监听的socket和新生成的socket都是使用80端口,那么客户端发给80端口的信息,怎么区分是给哪个socket?是通过客户端port做路由的。listen使用的socket是没有客户端信息的,它的客户端端口为*;而accept时新生成的socket是有客户端端口号。如下图,第一行是监听的socket,第二行是accet后生成的socket。因此通过四元组中的客户端port就可将客户端信息路由到了正确的socket上。</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line">wangzhilong@<span class="keyword">in</span>17-<span class="number">164</span>^:/mnt/mfs/$ netstat --tcp -lan | grep <span class="number">8787</span></span><br><span class="line">tcp <span class="number">0</span> <span class="number">0</span> <span class="number">0.0</span>.<span class="number">0.0</span>:<span class="number">8787</span> <span class="number">0.0</span>.<span class="number">0.0</span>:* LISTEN</span><br><span class="line">tcp <span class="number">0</span> <span class="number">0</span> <span class="number">10.4</span>.<span class="number">17.164</span>:<span class="number">8787</span> <span class="number">10.2</span>.<span class="number">22.31</span>:<span class="number">64285</span> TIME_WAIT</span><br></pre></td></tr></table></figure>
</div>
<div class="post-footer">
<div class="post-eof"></div>
</div>
</div>
<div class="post post-type-normal ">
<div class="post-header">
<h1 class="post-title">
<a class="post-title-link" href="/2015/08/25/kafka/">
kafka笔记
</a>
</h1>
<div class="post-meta">
<span class="post-time">
发表于 2015-08-25
</span>
<span class="post-comments-count">
|
<a href="/2015/08/25/kafka/#comments" >
<span class="post-comments-count ds-thread-count" data-thread-key="2015/08/25/kafka/"></span>
</a>
</span>
</div>
</div>
<div class="post-body">
<p>最近有使用kafka,顺便学习下kafka,这个博客写的很详细。<a href="http://www.jasongj.com/2015/01/02/Kafka%E6%B7%B1%E5%BA%A6%E8%A7%A3%E6%9E%90/" target="_blank" rel="external">http://www.jasongj.com/2015/01/02/Kafka%E6%B7%B1%E5%BA%A6%E8%A7%A3%E6%9E%90/</a><br>下面记录几条刚开始没有理解的点,方便以后回忆。</p>
<h4 id="1-topic和particion关系">1 topic和particion关系</h4><p>topic在逻辑上是一个队列,每条消费者必须指定它的topic。为了使得Kafka的吞吐率可以水平扩展,物理上把topic分成一个或多个partition,每个partition在物理上对应一个文件夹,该文件夹下存储这个partition的所有消息和索引文件。<br>partition中的每条消息都会被分配一个有序的id(offset)。kafka只保证按一个partition中的顺序将消息发给consumer,不保证一个topic的整体(多个partition间)的顺序<br>topic被发到哪个particion是通过消息中的key进行hash决定的。</p>
<h4 id="2-particion和consumer关系">2 particion和consumer关系</h4><p>一个particion最多只能由一个consumer去消费,但一个consumer可以消费多个particion。这也保证了一个particion中消息是顺序消费的,但如果consumer数量大于particion数量,多出来的consumer是浪费的。</p>
<h4 id="3-topic和consumer-groupcg关系">3 topic和consumer group(CG)关系</h4><p>这是kafka用来实现一个topic消息的广播(发给所有的consumer)和单播(发给任意一个consumer)的手段。一个topic可以有多个CG。topic的消息会复制(不是真的复制,是概念上的)到所有的CG,但每个CG只会把消息发给该CG中的一个consumer。如果需要实现广播,只要每个consumer有一个独立的CG就可以了。要实现单播只要所有的consumer在同一个CG。用CG还可以将consumer进行自由的分组而不需要多次发送消息到不同的topic。<br>既:一条消息只能一个CG消费一次,但可以被多个CG消费。</p>
<h4 id="4-消费状态怎么记录">4 消费状态怎么记录</h4><p>Kafka会为每一个consumergroup保留一些metadata信息:当前消费的消息的position,也即offset。这个offset由consumer控制。正常情况下consumer会在消费完一条消息后线性增加这个offset。当然,consumer也可将offset设成一个较小的值,重新消费一些消息。因为offet由consumer控制,所以Kafka broker是无状态的,它不需要标记哪些消息被哪些consumer过,不需要通过broker去保证同一个consumergroup只有一个consumer能消费某一条消息,因此也就不需要锁机制,这也为Kafka的高吞吐率提供了有力保障。</p>
</div>
<div class="post-footer">
<div class="post-eof"></div>
</div>
</div>
</div>
<div class="pagination">
<span class="page-number current">1</span><a class="page-number" href="/page/2/">2</a><a class="page-number" href="/page/3/">3</a><a class="extend next" rel="next" href="/page/2/">»</a>
</div>
</div>
</div>
<div class="sidebar-toggle">
<div class="sidebar-toggle-line-wrap">
<span class="sidebar-toggle-line sidebar-toggle-line-first"></span>
<span class="sidebar-toggle-line sidebar-toggle-line-middle"></span>
<span class="sidebar-toggle-line sidebar-toggle-line-last"></span>
</div>
</div>
<div id="sidebar" class="sidebar">
<div class="sidebar-inner">
<div class="site-overview">
<div class="site-author motion-element">
<img class="site-author-image" src="http://7xjw4u.com1.z0.glb.clouddn.com/logo.jpg" alt="wangzhilong" />
<p class="site-author-name">wangzhilong</p>
</div>
<p class="site-description motion-element"></p>
<div class="site-state motion-element">
<div class="site-state-item site-state-posts">
<a href="/archives">
<span class="site-state-item-count">11</span>
<span class="site-state-item-name">日志</span>
</a>
</div>
<div class="site-state-item site-state-categories">
<span class="site-state-item-count">1</span>
<span class="site-state-item-name">分类</span>
</div>
<div class="site-state-item site-state-tags">
<a href="/tags">
<span class="site-state-item-count">9</span>
<span class="site-state-item-name">标签</span>
</a>
</div>
</div>
<div class="links-of-author motion-element">
<span class="links-of-author-item">
<a href="http://weibo.com/wangzhilong2011" target="_blank">新浪微博</a>
</span>
</div>
</div>
</div>
</div>
</div>
<div id="footer" class="footer">
<div class="footer-inner">
<div class="copyright">
©
2016
<span class="with-love">
<i class="icon-heart"></i>
</span>
<span class="author">wangzhilong</span>
</div>
<div class="powered-by">
由 <a class="theme-link" href="http://hexo.io">Hexo</a> 强力驱动
</div>
<div class="theme-info">
主题 -
<a class="theme-link" href="https://github.com/iissnan/hexo-theme-next">
NexT.Mist
</a>
</div>
</div>
</div>
<div class="back-to-top"></div>
</div>
<script type="text/javascript" src="/vendors/jquery/index.js?v=2.1.3"></script>
<script type="text/javascript" src="/vendors/fancybox/source/jquery.fancybox.pack.js"></script>
<script type="text/javascript" src="/js/fancy-box.js?v=0.4.3"></script>
<script type="text/javascript" src="/js/helpers.js?v=0.4.3"></script>
<script type="text/javascript" src="/vendors/velocity/velocity.min.js"></script>
<script type="text/javascript" src="/vendors/velocity/velocity.ui.min.js"></script>
<script type="text/javascript" src="/js/motion_global.js?v=0.4.3" id="motion.global"></script>
<script type="text/javascript" src="/js/search-toggle.js"></script>
<script type="text/javascript">
$(document).ready(function () {
if (CONFIG.sidebar === 'always') {
displaySidebar();
}
});
</script>
<script type="text/javascript">
var duoshuoQuery = {short_name:"wangzhilong-blog"};
(function() {
var ds = document.createElement('script');
ds.type = 'text/javascript';ds.async = true;
ds.src = (document.location.protocol == 'https:' ? 'https:' : 'http:') + '//static.duoshuo.com/embed.js';
ds.charset = 'UTF-8';
(document.getElementsByTagName('head')[0]
|| document.getElementsByTagName('body')[0]).appendChild(ds);
})();
</script>
</body>
</html>