Process Management

Preview:

DESCRIPTION

outdated

Citation preview

1

Process Management

Roy Lee, 20 June 2005

NCTU Computer Operating System Lab

2

Process State

Newly Created.

Runnable & Running

Expired

Interrupted

Resume

Terminated

Robert Love, “Linux Kernel Development,” 2nd Edition

3

Process Creation – fork()

X

Y

B

C

Z

D

A

W

B

C

B

C

D

D

A

A

B

C

D

A

fork() exec()

Copy the whole address space

and the page table

Discard the current address space

and load another program

...

...

...

...

...

...

...

...

Parent Parent ParentChild Child

4

Process Creation – vfork()

X

Y

B

C

Z

D

A

W

B

C

D

A

B

C

D

A

vfork() exec()

Copy the whole address space

and the page table

Discard the current address space

and load another program

...

...

...

...

...

...

...

Parent Parent Parent

Child

Child

5

Process Creation – Copy-on-Write

B

C

D

A

B

C

D

A

fork() copy-on-write

Only copy the page table Delay or altogether prevent

copying of data

...

...

...

...

...

Parent Parent Child

B

C

D

A

B’

...

...

...

Parent Child

6

Process Creation – Copy-on-Write

X

Y

B

C

Z

D

A

W

B

C

D

A

B

C

D

A

fork() exec()

Only copy the page table

...

...

...

...

...

...

...

...

Parent Parent ParentChild Child

Delay or altogether prevent

copying of data

7

task_struct

[include/linux/sched.h]

Daniel P. Bovet, Marco Cesati, “Understanding the Linux Kernel,” 3rd Edition

Robert Love, “Linux Kernel Development,” 2nd Edition

8

Process Creation - Threads

Threads in Linux

To linux, threads are just processes that share more certain resources.

Clone() - The heart of the Linux implementation of threads

Threads are created like normal tasks, except that the clone() syscall is passed flags indicating to specific resources to be shared

clone(CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND, 0);

Actully both fork() and vfork() are also implemented via the clone() syscall

clone(SIGCHLD, 0);

clone(CLONE_VFORK | CLONE_VM | SIGCHLD, 0);

9

Process Creation Flow

do_fork()

copy_process()

wake_up_new_task()

wait_for_completion()vfork?yes

no

Kernel space

User space

alloc_pidmap()

sys_fork() sys_vfork() sys_clone()

return pid

no

free_pidmap()

success?

put the child into runqueue

when the child terminates,

it wakes up the parent sleeping in the wait queue

duplicate the task_struct, initialize it

and setup according to the specified clone_flags

yes

[kernel/process.c]

[kernel/fork.c][kernel/sched.c]

10

runningready

asleepinitial zombie

fork exit

Process State

schedule

preempted

11

kernel

runningready

asleepinitial zombie

fork exit

user

running

Process State

schedule

return

syscall,

exception

preempted

interrupt

12

Execution Mode and Context

Process

context

System

context

Kernel mode

User mode

system calls,

exceptions

interrupts,

system tasks

application

(user) codenot allowed

URESH VAHALA, “UNIX INTERNALS – THE NEW FRONTIERS”

13

Execution Mode and Context

X

Y

B

C

Z

D

A

W…

...

P0

...

P1

Process

contextSystem

context

Kernel mode

User mode

system calls,

exceptions

interrupts,

system tasks

application

(user) codenot allowed

Kernel

Space

User

Space

URESH VAHALA, “UNIX INTERNALS – THE NEW FRONTIERS”

14

thread_infostruct thread_info {

struct task_struct *task;

struct exec_domain *exec_domain;

__u32 flags;

__u32 status;

__u32 cpu;

int preempt_count;

mm_segment_t addr_limit;

struct restart_block restart_block;

};

Daniel P. Bovet, Marco Cesati, “Understanding the Linux Kernel,” 3rd Edition

15

1. long pid = alloc_pidmap();

2. if (pid < 0)

3. return -EAGAIN;

4. …

5. p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);

6. if (!IS_ERR(p)) {

7. struct completion vfork;

8. if (clone_flags & CLONE_VFORK) {

9. p->vfork_done = &vfork;

10. init_completion(&vfork);

11. }

12. …

13. if (!(clone_flags & CLONE_STOPPED))

14. wake_up_new_task(p, clone_flags);

15. else

16. p->state = TASK_STOPPED;

17. …

18. if (clone_flags & CLONE_VFORK) {

19. wait_for_completion(&vfork);

20. if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))

21. ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);

22. }

23. } else {

24. free_pidmap(pid);

25. pid = PTR_ERR(p);

26. }

27. return pid;

do_fork() (1/4)[kernel/fork.c]

16

1. long pid = alloc_pidmap();

2. if (pid < 0)

3. return -EAGAIN;

4. …

5. p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);

6. if (!IS_ERR(p)) {

7. struct completion vfork;

8. if (clone_flags & CLONE_VFORK) {

9. p->vfork_done = &vfork;

10. init_completion(&vfork);

11. }

12. …

13. if (!(clone_flags & CLONE_STOPPED))

14. wake_up_new_task(p, clone_flags);

15. else

16. p->state = TASK_STOPPED;

17. …

18. if (clone_flags & CLONE_VFORK) {

19. wait_for_completion(&vfork);

20. if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))

21. ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);

22. }

23. } else {

24. free_pidmap(pid);

25. pid = PTR_ERR(p);

26. }

27. return pid;

do_fork() (2/4)[kernel/fork.c]

17

1. long pid = alloc_pidmap();

2. if (pid < 0)

3. return -EAGAIN;

4. …

5. p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);

6. if (!IS_ERR(p)) {

7. struct completion vfork;

8. if (clone_flags & CLONE_VFORK) {

9. p->vfork_done = &vfork;

10. init_completion(&vfork);

11. }

12. …

13. if (!(clone_flags & CLONE_STOPPED))

14. wake_up_new_task(p, clone_flags);

15. else

16. p->state = TASK_STOPPED;

17. …

18. if (clone_flags & CLONE_VFORK) {

19. wait_for_completion(&vfork);

20. if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))

21. ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);

22. }

23. } else {

24. free_pidmap(pid);

25. pid = PTR_ERR(p);

26. }

27. return pid;

do_fork() (3/4)[kernel/fork.c]

18

1. long pid = alloc_pidmap();

2. if (pid < 0)

3. return -EAGAIN;

4. …

5. p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);

6. if (!IS_ERR(p)) {

7. struct completion vfork;

8. if (clone_flags & CLONE_VFORK) {

9. p->vfork_done = &vfork;

10. init_completion(&vfork);

11. }

12. …

13. if (!(clone_flags & CLONE_STOPPED))

14. wake_up_new_task(p, clone_flags);

15. else

16. p->state = TASK_STOPPED;

17. …

18. if (clone_flags & CLONE_VFORK) {

19. wait_for_completion(&vfork);

20. if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))

21. ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);

22. }

23. } else {

24. free_pidmap(pid);

25. pid = PTR_ERR(p);

26. }

27. return pid;

do_fork() (4/4)[kernel/fork.c]

19

copy_process()

int retval;

struct task_struct *p = NULL;

if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))

return ERR_PTR(-EINVAL);

/*

* Thread groups must share signals as well, and detached threads

* can only be started up within the thread group.

*/

if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))

return ERR_PTR(-EINVAL);

/*

* Shared signal handlers imply shared VM. By way of the above,

* thread groups also imply shared VM. Blocking this case allows

* for various simplifications in other code.

*/

if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))

return ERR_PTR(-EINVAL);

[kernel/fork.c]

20

copy_process()

retval = security_task_create(clone_flags);

if (retval)

goto fork_out;

retval = -ENOMEM;

p = dup_task_struct(current);

if (!p)

goto fork_out;

retval = -EAGAIN;

if (atomic_read(&p->user->processes) >=

p->signal->rlim[RLIMIT_NPROC].rlim_cur) {

if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&

p->user != &root_user)

goto bad_fork_free;

}

atomic_inc(&p->user->__count);

atomic_inc(&p->user->processes);

get_group_info(p->group_info);

21

dup_task_struct()

static struct task_struct *dup_task_struct(struct task_struct *orig)

{

struct task_struct *tsk;

struct thread_info *ti;

prepare_to_copy(orig);

tsk = alloc_task_struct();

if (!tsk)

return NULL;

ti = alloc_thread_info(tsk);

if (!ti) {

free_task_struct(tsk);

return NULL;

}

*ti = *orig->thread_info;

*tsk = *orig;

tsk->thread_info = ti;

ti->task = tsk;

atomic_set(&tsk->usage,2);

return tsk;

}

__get_free_pages(GFP_KERNEL,THREAD_ORDER)

2

#define unlazy_fpu(tsk) do { \

if ((tsk)->thread_info->status & TS_USEDFPU) \

save_init_fpu(tsk); \

} while (0)

Daniel P. Bovet, Marco Cesati, “Understanding the Linux Kernel,” 3rd Edition

22

copy_process()

if (nr_threads >= max_threads)

goto bad_fork_cleanup_count;

if (!try_module_get(p->thread_info->exec_domain->module))

goto bad_fork_cleanup_count;

if (p->binfmt && !try_module_get(p->binfmt->module))

goto bad_fork_cleanup_put_domain;

p->did_exec = 0;

copy_flags(clone_flags, p);

p->pid = pid;

retval = -EFAULT;

if (clone_flags & CLONE_PARENT_SETTID)

if (put_user(p->pid, parent_tidptr))

goto bad_fork_cleanup;

...

p->tgid = p->pid;

if (clone_flags & CLONE_THREAD)

p->tgid = current->tgid;

23

PID v.s. TGID

pid:1002

tgid:1002

pid:1003

tgid:1003

fork()

pid:1004

tgid:1004

fork()

pid:1005

tgid:1003clone()

pid:1006

tgid:1003

clone()pid:1007

tgid:1004

clone()

1.Every process has an unique pid.

2.Each process in the same thread group has the same tgid.

3.The tgid is the pid of the oldest process in that group

asmlinkage long sys_getpid(void)

{

return current->tgid;

}

do_fork(){

...

copy_process(){

...

p->pid = pid;

...

p->tgid = p->pid;

if (clone_flags & CLONE_THREAD)

p->tgid = current->tgid;

}

}

24

copy_process()

bad_fork_cleanup_namespace:

exit_namespace(p);

bad_fork_cleanup_keys:

exit_keys(p);

bad_fork_cleanup_mm:

if (p->mm)

mmput(p->mm);

bad_fork_cleanup_signal:

exit_signal(p);

bad_fork_cleanup_sighand:

exit_sighand(p);

bad_fork_cleanup_fs:

exit_fs(p); /* blocking */

bad_fork_cleanup_files:

exit_files(p); /* blocking */

bad_fork_cleanup_semundo:

exit_sem(p);

bad_fork_cleanup_audit:

audit_free(p);

bad_fork_cleanup_security:

security_task_free(p);

bad_fork_cleanup_policy:

if ((retval = security_task_alloc(p)))

goto bad_fork_cleanup_policy;

if ((retval = audit_alloc(p)))

goto bad_fork_cleanup_security;

/* copy all the process information */

if ((retval = copy_semundo(clone_flags, p)))

goto bad_fork_cleanup_audit;

if ((retval = copy_files(clone_flags, p)))

goto bad_fork_cleanup_semundo;

if ((retval = copy_fs(clone_flags, p)))

goto bad_fork_cleanup_files;

if ((retval = copy_sighand(clone_flags, p)))

goto bad_fork_cleanup_fs;

if ((retval = copy_signal(clone_flags, p)))

goto bad_fork_cleanup_sighand;

if ((retval = copy_mm(clone_flags, p)))

goto bad_fork_cleanup_signal;

if ((retval = copy_keys(clone_flags, p)))

goto bad_fork_cleanup_mm;

if ((retval = copy_namespace(clone_flags, p)))

goto bad_fork_cleanup_keys;

25

copy_process()if ((retval = security_task_alloc(p)))

goto bad_fork_cleanup_policy;

if ((retval = audit_alloc(p)))

goto bad_fork_cleanup_security;

/* copy all the process information */

if ((retval = copy_semundo(clone_flags, p)))

goto bad_fork_cleanup_audit;

if ((retval = copy_files(clone_flags, p)))

goto bad_fork_cleanup_semundo;

if ((retval = copy_fs(clone_flags, p)))

goto bad_fork_cleanup_files;

if ((retval = copy_sighand(clone_flags, p)))

goto bad_fork_cleanup_fs;

if ((retval = copy_signal(clone_flags, p)))

goto bad_fork_cleanup_sighand;

if ((retval = copy_mm(clone_flags, p)))

goto bad_fork_cleanup_signal;

if ((retval = copy_keys(clone_flags, p)))

goto bad_fork_cleanup_mm;

if ((retval = copy_namespace(clone_flags, p)))

goto bad_fork_cleanup_keys;

bad_fork_cleanup_namespace:

exit_namespace(p);

bad_fork_cleanup_keys:

exit_keys(p);

bad_fork_cleanup_mm:

if (p->mm)

mmput(p->mm);

bad_fork_cleanup_signal:

exit_signal(p);

bad_fork_cleanup_sighand:

exit_sighand(p);

bad_fork_cleanup_fs:

exit_fs(p); /* blocking */

bad_fork_cleanup_files:

exit_files(p); /* blocking */

bad_fork_cleanup_semundo:

exit_sem(p);

bad_fork_cleanup_audit:

audit_free(p);

bad_fork_cleanup_security:

security_task_free(p);

bad_fork_cleanup_policy:

26

context_swtich()

1. static inline

2. task_t * context_switch(runqueue_t *rq, task_t *prev, task_t *next)

3. {

4. struct mm_struct *mm = next->mm;

5. struct mm_struct *oldmm = prev->active_mm;

6. if (unlikely(!mm)) {

7. next->active_mm = oldmm;

8. atomic_inc(&oldmm->mm_count);

9. enter_lazy_tlb(oldmm, next);

10. } else

11. switch_mm(oldmm, mm, next);

12. if (unlikely(!prev->mm)) {

13. prev->active_mm = NULL;

14. WARN_ON(rq->prev_mm);

15. rq->prev_mm = oldmm;

16. }

17. /* Here we just switch the register state and the stack. */

18. switch_to(prev, next, prev);

19. return prev;

20. }

[kernel/sched.c]

Recommended