|
最近看linux0.12内核源代码,发现main.c中的一段程序需要反复理解才行,虽然还有一些细节没有搞懂,也先做个笔记,贴上一点实验数据,以后慢慢理解...
看main()中调用fork这里的程序:
161 hd_init();
167 }
sti()开启中断IF标志后(之前在schedule()中设置的timer_interrput中断门起作用了,每10ms会发出一个时间中断,但是以下代码在创建好task1之前,即使进入时间中断,并不会进行进程调度,也不会弄乱堆栈,暂时可以不考虑它),内核通过move_to_usr_mode()进入task0,在move_to_usr_mode()执行前的堆栈情况如下:
Stack address size 4
| STACK 0x000375c8 [0x00400000] //红色部分是进入main()后,初始化
| STACK 0x000375cc [0x00400000] //函数所push的参数,截至到sti()
| STACK 0x000375d0 [0x01000000]
| STACK 0x000375d4 [0x00000000]
| STACK 0x000375d8 [0x00000050]
| STACK 0x000375dc [0x000001cb]
| STACK 0x000375e0 [0x00005412] //main如果ret后的返回地址,L6
| STACK 0x000375e4 [0x00000000] //main的参数argc
| STACK 0x000375e8 [0x00000000] //main的参数argv
| STACK 0x000375ec [0x00000000] //main的参数envp
| STACK 0x000375f0 [0x00000000] //stack top, no data pushed
然后进入move_to_usr_mode():
1 #define move_to_user_mode() \
2 __asm__ ("movl %%esp,%%eax\n\t" \
3 "pushl $0x17\n\t" \
4 "pushl %%eax\n\t" \
5 "pushfl\n\t" \
6 "pushl $0x0f\n\t" \
7 "pushl $1f\n\t" \
8 "iret\n" \
9 "1:\tmovl $0x17,%%eax\n\t" \
10 "mov %%ax,%%ds\n\t" \
11 "mov %%ax,%%es\n\t" \
12 "mov %%ax,%%fs\n\t" \
13 "mov %%ax,%%gs" \
14 :::"ax")
push进了ss(iret后进入task0,现在内核的堆栈就成为task0的用户栈),push esp(现在的栈指针也就是task0用户栈的指针),然后按照iret返回顺序继续压栈,eflag,0x0f(task0的代码段,ldt表中的第1个描述符),
运行到iret前堆栈如下(从0x000375c8开始计):
Stack address size 4
| STACK 0x000375b4 [0x00006823] //eip
| STACK 0x000375b8 [0x0000000f] //task0代码段选择符,对应ldt第一个描述符
| STACK 0x000375bc [0x00000206] //eflag
| STACK 0x000375c0 [0x000375c8] //esp
| STACK 0x000375c4 [0x00000017] //task0数据段选择符,对应ldt第二个描述符
| STACK 0x000375c8 [0x00400000]
运行完iret后堆栈如下(从0x000375c8开始计):
| STACK 0x000375c8 [0x00400000]
此时进入task0,发现用户栈回到了sti()时的状态,之所以使用inline的move_to_usr_mode()方式进入task0,是因为之后会在task0中fork()出task1,而创建task1之初,task1会复制task0的大部分结构,包括用户栈,之后通过写时复制才创建自己的用户栈,那么在task0创建task1之前,不能使用自己的用户栈,否则会弄乱以后task1的用户栈,这也就是使用inline方式的原因。
接下来,task0执行以下代码(因为task0代码段与内核代码段重叠):
把数据段寄存器全部设为ldt第二个描述符
9 "1:\tmovl $0x17,%%eax\n\t" \
10 "mov %%ax,%%ds\n\t" \
11 "mov %%ax,%%es\n\t" \
12 "mov %%ax,%%fs\n\t" \
13 "mov %%ax,%%gs" \
14 :::"ax")
之后task0就开始执行fork()
if (!fork())
这里的fork()调用了glibc的库函数,库函数进一步调用内核系统调用_sys_call0(int,fork)
150 #define _syscall0(type,name) \
151 type name(void) \
152 { \
153 long __res; \
154 __asm__ volatile ("int $0x80" \
155 : "=a" (__res) \
156 : "0" (__NR_##name)); \
157 if (__res >= 0) \
158 return (type) __res; \
159 errno = -__res; \
160 return -1; \
161 }
宏展开为:
00006830: ( ): add esp, 0x0000000c ; 83c40c //为什么esp会自动加0xc?待查...
00006833: ( ): mov eax, 0x00000002 ; b802000000
00006838: ( ): int 0x80
调用int 0x80, eax=0x2(系统调用号_NR_fork)后,返回eax(__res),这里的__res也就是fork()后的返回值
int 0x80, eax=0x2后,进入_system_call,int 0x80之前的堆栈情况为(esp之前加了0xc):
Stack address size 4
| STACK 0x000375d4 [0x00000000]
int 0x80之后的堆栈情况(进入了task0的内核态,使用ss0=0x10和esp0=0x21194):
Stack address size 4
| STACK 0x00021180 [0x0000683a] //task0当前eip,对应if(__res>=0)
| STACK 0x00021184 [0x0000000f] //task0当前cs
| STACK 0x00021188 [0x00000216] //task0当前eflag
| STACK 0x0002118c [0x000375d4] //task0当前用户栈esp
| STACK 0x00021190 [0x00000017] //task0当前用户栈ss
| STACK 0x00021194 [0x00000001]
接下来执行_system_call:
83 .align 2
84 _system_call:
85 push %ds
86 push %es
87 push %fs
88 pushl %eax # save the orig_eax
89 pushl %edx
90 pushl %ecx # push %ebx,%ecx,%edx as parameters
91 pushl %ebx # to the system call
92 movl $0x10,%edx # set up ds,es to kernel space
93 mov %dx,%ds
94 mov %dx,%es
95 movl $0x17,%edx # fs points to local data space
96 mov %dx,%fs
97 cmpl _NR_syscalls,%eax
98 jae bad_sys_call
99 call _sys_call_table(,%eax,4)
100 pushl %eax
101 2:
102 movl _current,%eax
103 cmpl $0,state(%eax) # state
104 jne reschedule
105 cmpl $0,counter(%eax) # counter
106 je reschedule
先将ds,es,fs和此时的eax=0x2,edx,ecx,ebx入栈
之后堆栈情况如下:
Stack address size 4
| STACK 0x00021164 [0x00000000] //ebx
| STACK 0x00021168 [0x00055800] //ecx
| STACK 0x0002116c [0x00000021] //edx
| STACK 0x00021170 [0x00000002] //eax=中断调用号,对应sys_fork(实际之后检查的时候用的是当前寄存器eax,没有再去读这里入栈的eax)
| STACK 0x00021174 [0x00000017] //fs
| STACK 0x00021178 [0x00000017] //es
| STACK 0x0002117c [0x00000017] //ds
| STACK 0x00021180 [0x0000683a] //task0当前eip,对应if(__res>=0)
| STACK 0x00021184 [0x0000000f] //task0当前cs
| STACK 0x00021188 [0x00000216] //task0当前eflag
| STACK 0x0002118c [0x000375d4] //task0当前用户栈esp
| STACK 0x00021190 [0x00000017] //task0当前用户栈ss
| STACK 0x00021194 [0x00000001]
入栈后,把ds和es设成系统数据段0x10,fs设置成task0的ldt局部数据段
下面比较中断调用号是不是超过了87? 未超过,说明无错误,调用
_sys_call_table(,%eax,4) //&_sys_call_table+4*%eax,找到sys_fork
否则,调用bad_sys_call
调用_sys_fork之前的堆栈还是保持esp=0x21164,调用后的堆栈:
| STACK 0x00021160 [0x000078ab] //call _sys_call_table(,%eax,4) 下一条指令eip
| STACK 0x00021164 [0x00000000] //ebx
| STACK 0x00021168 [0x00055800] //ecx
| STACK 0x0002116c [0x00000021] //edx
| STACK 0x00021170 [0x00000002] //eax=中断调用号
| STACK 0x00021174 [0x00000017] //fs
| STACK 0x00021178 [0x00000017] //es
| STACK 0x0002117c [0x00000017] //ds
进入_sys_fork:
221 .align 2
222 _sys_fork:
223 call _find_empty_process
224 testl %eax,%eax
225 js 1f
226 push %gs
227 pushl %esi
228 pushl %edi
229 pushl %ebp
230 pushl %eax
231 call _copy_process
232 addl $20,%esp
233 1: ret
call _find_empty_process,找空闲pid和task[]空闲结构体,返回task[i]中的索引i(放在eax中)
这里call也只入栈call _find_empty_process后的指令eip=0x79c1,返回后,堆栈还是不变
| STACK 0x00021160 [0x000078ab] //call _sys_call_table(,%eax,4) 下一条指令eip
| STACK 0x00021164 [0x00000000] //ebx
| STACK 0x00021168 [0x00055800] //ecx
| STACK 0x0002116c [0x00000021] //edx
| STACK 0x00021170 [0x00000002] //eax=中断调用号
| STACK 0x00021174 [0x00000017] //fs
| STACK 0x00021178 [0x00000017] //es
| STACK 0x0002117c [0x00000017] //ds
然和检查返回的i是不是负数,不是的话继续压栈gs,esi,edi,ebp,eax(任务号i)
Stack address size 4
| STACK 0x0002114c [0x00000001] //eax(任务号i)
| STACK 0x00021150 [0x000375dc] //ebp
| STACK 0x00021154 [0x00000ffc] //edi
| STACK 0x00021158 [0x000e0000] //esi
| STACK 0x0002115c [0x00000017] //gs
| STACK 0x00021160 [0x000078ab] //call _sys_call_table(,%eax,4) 下一条指令eip
| STACK 0x00021164 [0x00000000] //ebx
| STACK 0x00021168 [0x00055800] //ecx
| STACK 0x0002116c [0x00000021] //edx
| STACK 0x00021170 [0x00000002] //eax=中断调用号
| STACK 0x00021174 [0x00000017] //fs
| STACK 0x00021178 [0x00000017] //es
| STACK 0x0002117c [0x00000017] //ds
接着调用call _copy_process创建task1
68 int copy_process(int nr,long ebp,long edi,long esi,long gs,long none,
69 long ebx,long ecx,long edx, long orig_eax,
70 long fs,long es,long ds,
71 long eip,long cs,long eflags,long esp,long ss)
copy_process会使用栈中的参数,对应关系如下:
| STACK 0x0002114c [0x00000001] //nr->eax(任务号i)
| STACK 0x00021150 [0x000375dc] //ebp->ebp
| STACK 0x00021154 [0x00000ffc] //edi->edi
| STACK 0x00021158 [0x000e0000] //esi->esi
| STACK 0x0002115c [0x00000017] //gs->gs
| STACK 0x00021160 [0x000078ab] //none->call _sys_call_table(,%eax,4) 下一条指令eip
| STACK 0x00021164 [0x00000000] //ebx->ebx
| STACK 0x00021168 [0x00055800] //ecx->ecx
| STACK 0x0002116c [0x00000021] //edx->edx
| STACK 0x00021170 [0x00000002] //orig_eax->eax=中断调用号
| STACK 0x00021174 [0x00000017] //fs->fs
| STACK 0x00021178 [0x00000017] //es->es
| STACK 0x0002117c [0x00000017] //ds->ds
| STACK 0x00021180 [0x0000683a] //eip->task0当前eip,对应if(__res>=0)
| STACK 0x00021184 [0x0000000f] //cs->task0当前cs
| STACK 0x00021188 [0x00000216] //eflag->task0当前eflag
| STACK 0x0002118c [0x000375d4] //esp->task0当前用户栈esp
| STACK 0x00021190 [0x00000017] //ss->task0当前用户栈ss
| STACK 0x00021194 [0x00000001]
接下来copy_process执行完后,返回last_pid,在eax中(copy_process中没有入栈,在call _sys_call_table(,%eax,4)下一句就是push %eax)。task1的state为TASK_RUNNING,时钟中断会在task0与task1之间进行调度,
task1的栈情况:
内核栈:与task0不一样,ss0=0x10, esp0=PAGE_SIZE+(long) p,实际值为0x1000000
用户栈:与task0一样,先使用_stack_start
假设:在copy_process之后,来了一个时间中断,由于特权级没变,都是0,因此,不会发生堆栈切换,使用的还是task0的内核栈,进入_timer_interrupt:
188 .align 2
189 _timer_interrupt:
190 push %ds # save ds,es and put kernel data space
191 push %es # into them. %fs is used by _system_call
192 push %fs
193 pushl $-1 # fill in -1 for orig_eax
194 pushl %edx # we save %eax,%ecx,%edx as gcc doesn't
195 pushl %ecx # save those across function calls. %ebx
196 pushl %ebx # is saved as we use that in ret_sys_call
197 pushl %eax
198 movl $0x10,%eax
199 mov %ax,%ds
200 mov %ax,%es
201 movl $0x17,%eax
202 mov %ax,%fs
203 incl _jiffies
204 movb $0x20,%al # EOI to interrupt controller #1
205 outb %al,$0x20
206 movl CS(%esp),%eax
207 andl $3,%eax # %eax is CPL (0 or 3, 0=supervisor)
208 pushl %eax
209 call _do_timer # 'do_timer(long CPL)' does everything from
210 addl $4,%esp # task switching to accounting ...
211 jmp ret_from_sys_call
先按顺序保存ds,es,fs,-1(这个-1是为了表明不是系统调用,因为这是个中断),edx,ecx,ebx,eax(这时eax就是last_pid,不能丢啊),接下来累加jiffies,发送EOI清除ISR中的位(注意,Intel规定,使用interrput gate处理中断时,IF会自动清0,也就是说在本时间中断中,不会相应可屏蔽中断,ie.另一个时间中断,因此放心大胆地运行吧),下面就会调用do_timer,跟进参数eax(记录之前的CPL),也会有ccall _do_timer后一句eip入栈,在do_timer中有这么一句:
364 if ((--current->counter)>0) return;
365 current->counter=0;
366 if (!cpl) return;
367 schedule();
也就是说,如果在内核态运行时,时钟中断不会进行调度(这大概就是Linux内核态不能抢占的原因吧)
既然不能调度,把call _do_timer下一句eip出栈,执行
210 addl $4,%esp # task switching to accounting ...
211 jmp ret_from_sys_call
esp+4后堆栈:
| STACK 0x00021124 [0x_______] //eax(CPL)
| STACK 0x00021128 [0x_______] <-esp //eax(last_pid)
| STACK 0x0002112c [0x_______] //ebx
| STACK 0x00021130 [0x_______] //ecx
| STACK 0x00021134 [0x_______] //edx
| STACK 0x00021138 [0x_______] //-1
| STACK 0x0002113c [0x_______] //fs
| STACK 0x00021140 [0x_______] //es
| STACK 0x00021144 [0x_______] //ds
时间中断返回eip
| STACK 0x00021148 [0x_______] //time_interrupt return eip
| STACK 0x0002114c [0x00000001] //nr->eax(任务号i)
| STACK 0x00021150 [0x000375dc] //ebp->ebp
| STACK 0x00021154 [0x00000ffc] //edi->edi
| STACK 0x00021158 [0x000e0000] //esi->esi
| STACK 0x0002115c [0x00000017] //gs->gs
| STACK 0x00021160 [0x000078ab] //none->call _sys_call_table(,%eax,4) 下一条指令eip
| STACK 0x00021164 [0x00000000] //ebx->ebx
| STACK 0x00021168 [0x00055800] //ecx->ecx
| STACK 0x0002116c [0x00000021] //edx->edx
| STACK 0x00021170 [0x00000002] //orig_eax->eax=中断调用号
| STACK 0x00021174 [0x00000017] //fs->fs
| STACK 0x00021178 [0x00000017] //es->es
| STACK 0x0002117c [0x00000017] //ds->ds
| STACK 0x00021180 [0x0000683a] //eip->task0当前eip,对应if(__res>=0)
| STACK 0x00021184 [0x0000000f] //cs->task0当前cs
| STACK 0x00021188 [0x00000216] //eflag->task0当前eflag
| STACK 0x0002118c [0x000375d4] //esp->task0当前用户栈esp
| STACK 0x00021190 [0x00000017] //ss->task0当前用户栈ss
| STACK 0x00021194 [0x00000001]
107 ret_from_sys_call:
108 movl _current,%eax
109 cmpl _task,%eax # task[0] cannot have signals
110 je 3f
比较当前进程是不是task0,如果是,则直接跳3f
111 cmpw $0x0f,CS(%esp) # was old code segment supervisor ?
112 jne 3f
113 cmpw $0x17,OLDSS(%esp) # was stack segment = 0x17 ?
114 jne 3f
115 movl signal(%eax),%ebx
116 movl blocked(%eax),%ecx
117 notl %ecx
118 andl %ebx,%ecx
119 bsfl %ecx,%ecx
120 je 3f
121 btrl %ecx,%ebx
122 movl %ebx,signal(%eax)
123 incl %ecx
124 pushl %ecx
125 call _do_signal
126 popl %ecx
127 testl %eax, %eax
128 jne 2b # see if we need to switch tasks, or do more signals
129 3: popl %eax
130 popl %ebx
131 popl %ecx
132 popl %edx
133 addl $4, %esp # skip orig_eax
134 pop %fs
135 pop %es
136 pop %ds
137 iret
从3:开始看,灰色部分一一出栈
| STACK 0x00021124 [0x_______] //eax(CPL)
| STACK 0x00021128 [0x_______] //eax<-eax(last_pid)
| STACK 0x0002112c [0x_______] //ebx<-ebx
| STACK 0x00021130 [0x_______] //ecx<-ecx
| STACK 0x00021134 [0x_______] //edx<-edx
| STACK 0x00021138 [0x_______] //-1 (addl $4,%esp跳过)
| STACK 0x0002113c [0x_______] //fs<-fs
| STACK 0x00021140 [0x_______] //es<-es
| STACK 0x00021144 [0x_______] //ds<-ds
时间中断返回eip
| STACK 0x00021148 [0x_______] //time_interrupt return eip 返回出栈
| STACK 0x0002114c [0x00000001] <-esp//nr->eax(任务号i)
| STACK 0x00021150 [0x000375dc] //ebp->ebp
| STACK 0x00021154 [0x00000ffc] //edi->edi
| STACK 0x00021158 [0x000e0000] //esi->esi
| STACK 0x0002115c [0x00000017] //gs->gs
| STACK 0x00021160 [0x000078ab] //none->call _sys_call_table(,%eax,4) 下一条指令eip
| STACK 0x00021164 [0x00000000] //ebx->ebx
| STACK 0x00021168 [0x00055800] //ecx->ecx
| STACK 0x0002116c [0x00000021] //edx->edx
| STACK 0x00021170 [0x00000002] //orig_eax->eax=中断调用号
| STACK 0x00021174 [0x00000017] //fs->fs
| STACK 0x00021178 [0x00000017] //es->es
| STACK 0x0002117c [0x00000017] //ds->ds
| STACK 0x00021180 [0x0000683a] //eip->task0当前eip,对应if(__res>=0)
| STACK 0x00021184 [0x0000000f] //cs->task0当前cs
| STACK 0x00021188 [0x00000216] //eflag->task0当前eflag
| STACK 0x0002118c [0x000375d4] //esp->task0当前用户栈esp
| STACK 0x00021190 [0x00000017] //ss->task0当前用户栈ss
| STACK 0x00021194 [0x00000001]
此时的堆栈与中断前,copy_process后一样,因此,如果copy_process后直接发生时钟中断,并不会调度task1,(因为现在运行在CPL=0!!!)。
好了,现在让我们再回到copy_process之后:
_sys_fork:
...省略n行
231 call _copy_process
232 addl $20,%esp
233 1: ret
107 ret_from_sys_call:
108 movl _current,%eax
109 cmpl _task,%eax # task[0] cannot have signals
110 je 3f
111 cmpw $0x0f,CS(%esp) # was old code segment supervisor ?
112 jne 3f
113 cmpw $0x17,OLDSS(%esp) # was stack segment = 0x17 ?
114 jne 3f
115 movl signal(%eax),%ebx
116 movl blocked(%eax),%ecx
117 notl %ecx
118 andl %ebx,%ecx
119 bsfl %ecx,%ecx
120 je 3f
121 btrl %ecx,%ebx
122 movl %ebx,signal(%eax)
123 incl %ecx
124 pushl %ecx
125 call _do_signal
126 popl %ecx
127 testl %eax, %eax
128 jne 2b # see if we need to switch tasks, or do more signals
129 3: popl %eax
130 popl %ebx
131 popl %ecx
132 popl %edx
133 addl $4, %esp # skip orig_eax
134 pop %fs
135 pop %es
136 pop %ds
137 iret
esp+20后:
| STACK 0x0002114c [0x00000001] //nr->eax(任务号i)
| STACK 0x00021150 [0x000375dc] //ebp->ebp
| STACK 0x00021154 [0x00000ffc] //edi->edi
| STACK 0x00021158 [0x000e0000] //esi->esi
| STACK 0x0002115c [0x00000017] //gs->gs
| STACK 0x00021160 [0x000078ab] <-esp//none->call _sys_call_table(,%eax,4) 下一条指令eip
| STACK 0x00021164 [0x00000000] //ebx->ebx
| STACK 0x00021168 [0x00055800] //ecx->ecx
| STACK 0x0002116c [0x00000021] //edx->edx
| STACK 0x00021170 [0x00000002] //orig_eax->eax=中断调用号
| STACK 0x00021174 [0x00000017] //fs->fs
| STACK 0x00021178 [0x00000017] //es->es
| STACK 0x0002117c [0x00000017] //ds->ds
| STACK 0x00021180 [0x0000683a] //eip->task0当前eip,对应if(__res>=0)
| STACK 0x00021184 [0x0000000f] //cs->task0当前cs
| STACK 0x00021188 [0x00000216] //eflag->task0当前eflag
| STACK 0x0002118c [0x000375d4] //esp->task0当前用户栈esp
| STACK 0x00021190 [0x00000017] //ss->task0当前用户栈ss
| STACK 0x00021194 [0x00000001]
99 call _sys_call_table(,%eax,4)
100 pushl %eax <-last-pid
101 2:
102 movl _current,%eax
103 cmpl $0,state(%eax) # state
104 jne reschedule
105 cmpl $0,counter(%eax) # counter
106 je reschedule
堆栈如下:
| STACK 0x00021160 [0x00000001] <-esp //eax(last_pid)
| STACK 0x00021164 [0x00000000] //ebx->ebx
| STACK 0x00021168 [0x00055800] //ecx->ecx
| STACK 0x0002116c [0x00000021] //edx->edx
| STACK 0x00021170 [0x00000002] //orig_eax->eax=中断调用号
| STACK 0x00021174 [0x00000017] //fs->fs
| STACK 0x00021178 [0x00000017] //es->es
| STACK 0x0002117c [0x00000017] //ds->ds
| STACK 0x00021180 [0x0000683a] //eip->task0当前eip,对应if(__res>=0)
| STACK 0x00021184 [0x0000000f] //cs->task0当前cs
| STACK 0x00021188 [0x00000216] //eflag->task0当前eflag
| STACK 0x0002118c [0x000375d4] //esp->task0当前用户栈esp
| STACK 0x00021190 [0x00000017] //ss->task0当前用户栈ss
| STACK 0x00021194 [0x00000001]
压入eax(copy_process返回的last_pid),注意,从前面到现在我们一直在task0中运行,调度过task1,然后判断,如果current tast未就绪或者时间片用完了,就运行reschedule,进行调度,这两句程序没有完全理解,待查...
接着运行ret_from_sys_call
运行到:
129 3: popl %eax
130 popl %ebx
131 popl %ecx
132 popl %edx
133 addl $4, %esp # skip orig_eax
134 pop %fs
135 pop %es
136 pop %ds
137 iret
| STACK 0x00021160 [0x00000001] //eax(__res)<-eax(last_pid)
| STACK 0x00021164 [0x00000000] //ebx->ebx
| STACK 0x00021168 [0x00055800] //ecx->ecx
| STACK 0x0002116c [0x00000021] //edx->edx
| STACK 0x00021170 [0x00000002] //orig_eax->eax=中断调用号
| STACK 0x00021174 [0x00000017] //fs->fs
| STACK 0x00021178 [0x00000017] //es->es
| STACK 0x0002117c [0x00000017] //ds->ds
以下是iret回到_syscall0(int,fork)的出栈,出栈后,任务运行在task0的用户态,
ss=0x17, esp=0x375d4
| STACK 0x00021180 [0x0000683a] //eip->task0当前eip,对应if(__res>=0)
| STACK 0x00021184 [0x0000000f] //cs->task0当前cs
| STACK 0x00021188 [0x00000216] //eflag->task0当前eflag
| STACK 0x0002118c [0x000375d4] //esp->task0当前用户栈esp
| STACK 0x00021190 [0x00000017] //ss->task0当前用户栈ss
| STACK 0x00021194 [0x00000001]
最后,返回__res(eax),也就是last-pid,因此,task0返回的是task1的任务号,也就是其子进程的任务号!!!
157 if (__res >= 0) \
158 return (type) __res; \
159 errno = -__res; \
160 return -1; \
161 }
*************************************************************************************
如果接下来一直使用s进行单步调试(不会有时间中断),果然,eax=0x1,程序没有去运行init(),而去运行了pause(int 0x80),那么说明到现在为止,task1从来没有被调度到,这时候,我在timer_interrupt的地方加上break,cont之后,就应该是进行task1的调度了,马上实验下!!!
注意:我后来才发现,以上方式只是有一定概率能成功,如果task0提前运行了schedule(),那么进入中断的时候是从task1切换成其他任务。
task0回到用户态后,运行到timer_interrupt时的堆栈(task0内核态堆栈):
Stack address size 4
| STACK 0x000210c4 [0x00001064] //task0 eax
| STACK 0x000210c8 [0x00019027] //task0 ebx
| STACK 0x000210cc [0x00000087] //task0 ecx
| STACK 0x000210d0 [0x00ffe064] //task0 edx
| STACK 0x000210d4 [0xffffffff] //-1
| STACK 0x000210d8 [0x00000017] //task0 fs
| STACK 0x000210dc [0x00000010] //task0 es
| STACK 0x000210e0 [0x00000010] //task0 ds
| STACK 0x000210e4 [0x0000c4a0] //task0 eip
| STACK 0x000210e8 [0x00000008] //task0 cs
| STACK 0x000210ec [0x00000602] //以下应该是pause()对task0系统栈的使用数据
| STACK 0x000210f0 [0x00fff000]
| STACK 0x000210f4 [0x04000000]
| STACK 0x000210f8 [0x00000ffc]
| STACK 0x000210fc [0x00000000]
| STACK 0x00021100 [0x00ffe064]
| STACK 0x00021104 [0x00000040]
| STACK 0x00021108 [0x00000000]
| STACK 0x0002110c [0x00001064]
| STACK 0x00021110 [0x00000000]
| STACK 0x00021114 [0x00021144]
| STACK 0x00021118 [0x000098b0]
| STACK 0x0002111c [0x00000000]
| STACK 0x00021120 [0x04000000]
| STACK 0x00021124 [0x000a0000]
| STACK 0x00021128 [0x00000000]
| STACK 0x0002112c [0x000e0000]
| STACK 0x00021130 [0x00000ffc]
| STACK 0x00021134 [0x000a0000]
| STACK 0x00021138 [0x00000000]
| STACK 0x0002113c [0x00000000]
| STACK 0x00021140 [0x00000001]
| STACK 0x00021144 [0x000375dc]
| STACK 0x00021148 [0x000079d0]
| STACK 0x0002114c [0x00000001]
| STACK 0x00021150 [0x000375dc]
| STACK 0x00021154 [0x00000ffc]
| STACK 0x00021158 [0x000e0000]
| STACK 0x0002115c [0x00000017]
| STACK 0x00021160 [0x000078ab]
| STACK 0x00021164 [0x00000000]
| STACK 0x00021168 [0x00055800]
| STACK 0x0002116c [0x00000021]
| STACK 0x00021170 [0x00000002]
| STACK 0x00021174 [0x00000017]
| STACK 0x00021178 [0x00000017]
| STACK 0x0002117c [0x00000017]
| STACK 0x00021180 [0x0000683a]
| STACK 0x00021184 [0x0000000f]
| STACK 0x00021188 [0x00000216]
| STACK 0x0002118c [0x000375d4]
| STACK 0x00021190 [0x00000017]
| STACK 0x00021194 [0x00000002] //栈顶(数据无效)
代码:
188 .align 2
189 _timer_interrupt:
190 push %ds # save ds,es and put kernel data space
191 push %es # into them. %fs is used by _system_call
192 push %fs
193 pushl $-1 # fill in -1 for orig_eax
194 pushl %edx # we save %eax,%ecx,%edx as gcc doesn't
195 pushl %ecx # save those across function calls. %ebx
196 pushl %ebx # is saved as we use that in ret_sys_call
197 pushl %eax
198 movl $0x10,%eax
199 mov %ax,%ds
200 mov %ax,%es
201 movl $0x17,%eax
202 mov %ax,%fs
203 incl _jiffies
204 movb $0x20,%al # EOI to interrupt controller #1
205 outb %al,$0x20
206 movl CS(%esp),%eax #也就是堆栈中cs=0x8
207 andl $3,%eax # %eax is CPL (0 or 3, 0=supervisor)
208 pushl %eax #eax=0x0,CPL=0
209 call _do_timer # 'do_timer(long CPL)' does everything from
210 addl $4,%esp # task switching to accounting ...
211 jmp ret_from_sys_call
call _do_timer之前的栈
| STACK 0x000210c0 [0x00000000] //eax,CPL=0
| STACK 0x000210c4 [0x00001064] //task0 eax
| STACK 0x000210c8 [0x00019027] //task0 ebx
| STACK 0x000210cc [0x00000087] //task0 ecx
| STACK 0x000210d0 [0x00ffe064] //task0 edx
| STACK 0x000210d4 [0xffffffff] //-1
| STACK 0x000210d8 [0x00000017] //task0 fs
| STACK 0x000210dc [0x00000010] //task0 es
| STACK 0x000210e0 [0x00000010] //task0 ds
| STACK 0x000210e4 [0x0000c4a0] //task0 eip
| STACK 0x000210e8 [0x00000008] //task0 cs
调用do_timer,也会有call _do_timer后一句eip入栈,在do_timer中有这么一句:
364 if ((--current->counter)>0) return;
365 current->counter=0;
366 if (!cpl) return;
367 schedule();
此时的CPL=0,还是不能调用shedule(),return后jmp ret_from_sys_call,堆栈保持不变
这点一开始让我很奇怪,这样岂不是永远进不了task1了吗(之前我以为pause()是一个循环体)?然后看一下pause()程序:
164 int sys_pause(void)
165 {
166 current->state = TASK_INTERRUPTIBLE;
167 schedule();
168 return 0;
169 }
原来sys_pause()中有schedule()函数的调用,因此,实际的task0调度到task1应该是在sys_pause()中,也就是说,task0运行sys_pause,然后调用schedule()对task1进行调度,而自身处于TASK_INTERRUPTIBLE状态(不参与调度)。之后task1运行在用户态,可以fork()出task2...taskn,这时,task1...taskn之间是通过时钟中断中的schedule()进行调度,因为他们在中断前的CPL=3。
当调度到task1后,task1的eip是task0 fork()时int 0x80处压入的eip(放在task1的tss中),也就是int 0x80的后一句指令:
return (type) __res;
而调度到task1时,所有的寄存器加载task1的tss,而p->tss.eax=0,因此,task1这时候退出fork(),并且返回0,这也就是父程序返回子程序的pid,而子程序返回0的原因!