OpenCores
URL https://opencores.org/ocsvn/test_project/test_project/trunk

Subversion Repositories test_project

[/] [test_project/] [trunk/] [linux_sd_driver/] [fs/] [exec.c] - Blame information for rev 62

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 62 marcus.erl
/*
2
 *  linux/fs/exec.c
3
 *
4
 *  Copyright (C) 1991, 1992  Linus Torvalds
5
 */
6
 
7
/*
8
 * #!-checking implemented by tytso.
9
 */
10
/*
11
 * Demand-loading implemented 01.12.91 - no need to read anything but
12
 * the header into memory. The inode of the executable is put into
13
 * "current->executable", and page faults do the actual loading. Clean.
14
 *
15
 * Once more I can proudly say that linux stood up to being changed: it
16
 * was less than 2 hours work to get demand-loading completely implemented.
17
 *
18
 * Demand loading changed July 1993 by Eric Youngdale.   Use mmap instead,
19
 * current->executable is only used by the procfs.  This allows a dispatch
20
 * table to check for several different types  of binary formats.  We keep
21
 * trying until we recognize the file or we run out of supported binary
22
 * formats.
23
 */
24
 
25
#include <linux/slab.h>
26
#include <linux/file.h>
27
#include <linux/mman.h>
28
#include <linux/a.out.h>
29
#include <linux/stat.h>
30
#include <linux/fcntl.h>
31
#include <linux/smp_lock.h>
32
#include <linux/string.h>
33
#include <linux/init.h>
34
#include <linux/pagemap.h>
35
#include <linux/highmem.h>
36
#include <linux/spinlock.h>
37
#include <linux/key.h>
38
#include <linux/personality.h>
39
#include <linux/binfmts.h>
40
#include <linux/swap.h>
41
#include <linux/utsname.h>
42
#include <linux/pid_namespace.h>
43
#include <linux/module.h>
44
#include <linux/namei.h>
45
#include <linux/proc_fs.h>
46
#include <linux/ptrace.h>
47
#include <linux/mount.h>
48
#include <linux/security.h>
49
#include <linux/syscalls.h>
50
#include <linux/rmap.h>
51
#include <linux/tsacct_kern.h>
52
#include <linux/cn_proc.h>
53
#include <linux/audit.h>
54
 
55
#include <asm/uaccess.h>
56
#include <asm/mmu_context.h>
57
#include <asm/tlb.h>
58
 
59
#ifdef CONFIG_KMOD
60
#include <linux/kmod.h>
61
#endif
62
 
63
int core_uses_pid;
64
char core_pattern[CORENAME_MAX_SIZE] = "core";
65
int suid_dumpable = 0;
66
 
67
/* The maximal length of core_pattern is also specified in sysctl.c */
68
 
69
static LIST_HEAD(formats);
70
static DEFINE_RWLOCK(binfmt_lock);
71
 
72
int register_binfmt(struct linux_binfmt * fmt)
73
{
74
        if (!fmt)
75
                return -EINVAL;
76
        write_lock(&binfmt_lock);
77
        list_add(&fmt->lh, &formats);
78
        write_unlock(&binfmt_lock);
79
        return 0;
80
}
81
 
82
EXPORT_SYMBOL(register_binfmt);
83
 
84
void unregister_binfmt(struct linux_binfmt * fmt)
85
{
86
        write_lock(&binfmt_lock);
87
        list_del(&fmt->lh);
88
        write_unlock(&binfmt_lock);
89
}
90
 
91
EXPORT_SYMBOL(unregister_binfmt);
92
 
93
static inline void put_binfmt(struct linux_binfmt * fmt)
94
{
95
        module_put(fmt->module);
96
}
97
 
98
/*
99
 * Note that a shared library must be both readable and executable due to
100
 * security reasons.
101
 *
102
 * Also note that we take the address to load from from the file itself.
103
 */
104
asmlinkage long sys_uselib(const char __user * library)
105
{
106
        struct file * file;
107
        struct nameidata nd;
108
        int error;
109
 
110
        error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC);
111
        if (error)
112
                goto out;
113
 
114
        error = -EINVAL;
115
        if (!S_ISREG(nd.dentry->d_inode->i_mode))
116
                goto exit;
117
 
118
        error = vfs_permission(&nd, MAY_READ | MAY_EXEC);
119
        if (error)
120
                goto exit;
121
 
122
        file = nameidata_to_filp(&nd, O_RDONLY);
123
        error = PTR_ERR(file);
124
        if (IS_ERR(file))
125
                goto out;
126
 
127
        error = -ENOEXEC;
128
        if(file->f_op) {
129
                struct linux_binfmt * fmt;
130
 
131
                read_lock(&binfmt_lock);
132
                list_for_each_entry(fmt, &formats, lh) {
133
                        if (!fmt->load_shlib)
134
                                continue;
135
                        if (!try_module_get(fmt->module))
136
                                continue;
137
                        read_unlock(&binfmt_lock);
138
                        error = fmt->load_shlib(file);
139
                        read_lock(&binfmt_lock);
140
                        put_binfmt(fmt);
141
                        if (error != -ENOEXEC)
142
                                break;
143
                }
144
                read_unlock(&binfmt_lock);
145
        }
146
        fput(file);
147
out:
148
        return error;
149
exit:
150
        release_open_intent(&nd);
151
        path_release(&nd);
152
        goto out;
153
}
154
 
155
#ifdef CONFIG_MMU
156
 
157
static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
158
                int write)
159
{
160
        struct page *page;
161
        int ret;
162
 
163
#ifdef CONFIG_STACK_GROWSUP
164
        if (write) {
165
                ret = expand_stack_downwards(bprm->vma, pos);
166
                if (ret < 0)
167
                        return NULL;
168
        }
169
#endif
170
        ret = get_user_pages(current, bprm->mm, pos,
171
                        1, write, 1, &page, NULL);
172
        if (ret <= 0)
173
                return NULL;
174
 
175
        if (write) {
176
                struct rlimit *rlim = current->signal->rlim;
177
                unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
178
 
179
                /*
180
                 * Limit to 1/4-th the stack size for the argv+env strings.
181
                 * This ensures that:
182
                 *  - the remaining binfmt code will not run out of stack space,
183
                 *  - the program will have a reasonable amount of stack left
184
                 *    to work from.
185
                 */
186
                if (size > rlim[RLIMIT_STACK].rlim_cur / 4) {
187
                        put_page(page);
188
                        return NULL;
189
                }
190
        }
191
 
192
        return page;
193
}
194
 
195
static void put_arg_page(struct page *page)
196
{
197
        put_page(page);
198
}
199
 
200
static void free_arg_page(struct linux_binprm *bprm, int i)
201
{
202
}
203
 
204
static void free_arg_pages(struct linux_binprm *bprm)
205
{
206
}
207
 
208
static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
209
                struct page *page)
210
{
211
        flush_cache_page(bprm->vma, pos, page_to_pfn(page));
212
}
213
 
214
static int __bprm_mm_init(struct linux_binprm *bprm)
215
{
216
        int err = -ENOMEM;
217
        struct vm_area_struct *vma = NULL;
218
        struct mm_struct *mm = bprm->mm;
219
 
220
        bprm->vma = vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
221
        if (!vma)
222
                goto err;
223
 
224
        down_write(&mm->mmap_sem);
225
        vma->vm_mm = mm;
226
 
227
        /*
228
         * Place the stack at the largest stack address the architecture
229
         * supports. Later, we'll move this to an appropriate place. We don't
230
         * use STACK_TOP because that can depend on attributes which aren't
231
         * configured yet.
232
         */
233
        vma->vm_end = STACK_TOP_MAX;
234
        vma->vm_start = vma->vm_end - PAGE_SIZE;
235
 
236
        vma->vm_flags = VM_STACK_FLAGS;
237
        vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
238
        err = insert_vm_struct(mm, vma);
239
        if (err) {
240
                up_write(&mm->mmap_sem);
241
                goto err;
242
        }
243
 
244
        mm->stack_vm = mm->total_vm = 1;
245
        up_write(&mm->mmap_sem);
246
 
247
        bprm->p = vma->vm_end - sizeof(void *);
248
 
249
        return 0;
250
 
251
err:
252
        if (vma) {
253
                bprm->vma = NULL;
254
                kmem_cache_free(vm_area_cachep, vma);
255
        }
256
 
257
        return err;
258
}
259
 
260
static bool valid_arg_len(struct linux_binprm *bprm, long len)
261
{
262
        return len <= MAX_ARG_STRLEN;
263
}
264
 
265
#else
266
 
267
static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
268
                int write)
269
{
270
        struct page *page;
271
 
272
        page = bprm->page[pos / PAGE_SIZE];
273
        if (!page && write) {
274
                page = alloc_page(GFP_HIGHUSER|__GFP_ZERO);
275
                if (!page)
276
                        return NULL;
277
                bprm->page[pos / PAGE_SIZE] = page;
278
        }
279
 
280
        return page;
281
}
282
 
283
static void put_arg_page(struct page *page)
284
{
285
}
286
 
287
static void free_arg_page(struct linux_binprm *bprm, int i)
288
{
289
        if (bprm->page[i]) {
290
                __free_page(bprm->page[i]);
291
                bprm->page[i] = NULL;
292
        }
293
}
294
 
295
static void free_arg_pages(struct linux_binprm *bprm)
296
{
297
        int i;
298
 
299
        for (i = 0; i < MAX_ARG_PAGES; i++)
300
                free_arg_page(bprm, i);
301
}
302
 
303
static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
304
                struct page *page)
305
{
306
}
307
 
308
static int __bprm_mm_init(struct linux_binprm *bprm)
309
{
310
        bprm->p = PAGE_SIZE * MAX_ARG_PAGES - sizeof(void *);
311
        return 0;
312
}
313
 
314
static bool valid_arg_len(struct linux_binprm *bprm, long len)
315
{
316
        return len <= bprm->p;
317
}
318
 
319
#endif /* CONFIG_MMU */
320
 
321
/*
322
 * Create a new mm_struct and populate it with a temporary stack
323
 * vm_area_struct.  We don't have enough context at this point to set the stack
324
 * flags, permissions, and offset, so we use temporary values.  We'll update
325
 * them later in setup_arg_pages().
326
 */
327
int bprm_mm_init(struct linux_binprm *bprm)
328
{
329
        int err;
330
        struct mm_struct *mm = NULL;
331
 
332
        bprm->mm = mm = mm_alloc();
333
        err = -ENOMEM;
334
        if (!mm)
335
                goto err;
336
 
337
        err = init_new_context(current, mm);
338
        if (err)
339
                goto err;
340
 
341
        err = __bprm_mm_init(bprm);
342
        if (err)
343
                goto err;
344
 
345
        return 0;
346
 
347
err:
348
        if (mm) {
349
                bprm->mm = NULL;
350
                mmdrop(mm);
351
        }
352
 
353
        return err;
354
}
355
 
356
/*
357
 * count() counts the number of strings in array ARGV.
358
 */
359
static int count(char __user * __user * argv, int max)
360
{
361
        int i = 0;
362
 
363
        if (argv != NULL) {
364
                for (;;) {
365
                        char __user * p;
366
 
367
                        if (get_user(p, argv))
368
                                return -EFAULT;
369
                        if (!p)
370
                                break;
371
                        argv++;
372
                        if(++i > max)
373
                                return -E2BIG;
374
                        cond_resched();
375
                }
376
        }
377
        return i;
378
}
379
 
380
/*
381
 * 'copy_strings()' copies argument/environment strings from the old
382
 * processes's memory to the new process's stack.  The call to get_user_pages()
383
 * ensures the destination page is created and not swapped out.
384
 */
385
static int copy_strings(int argc, char __user * __user * argv,
386
                        struct linux_binprm *bprm)
387
{
388
        struct page *kmapped_page = NULL;
389
        char *kaddr = NULL;
390
        unsigned long kpos = 0;
391
        int ret;
392
 
393
        while (argc-- > 0) {
394
                char __user *str;
395
                int len;
396
                unsigned long pos;
397
 
398
                if (get_user(str, argv+argc) ||
399
                                !(len = strnlen_user(str, MAX_ARG_STRLEN))) {
400
                        ret = -EFAULT;
401
                        goto out;
402
                }
403
 
404
                if (!valid_arg_len(bprm, len)) {
405
                        ret = -E2BIG;
406
                        goto out;
407
                }
408
 
409
                /* We're going to work our way backwords. */
410
                pos = bprm->p;
411
                str += len;
412
                bprm->p -= len;
413
 
414
                while (len > 0) {
415
                        int offset, bytes_to_copy;
416
 
417
                        offset = pos % PAGE_SIZE;
418
                        if (offset == 0)
419
                                offset = PAGE_SIZE;
420
 
421
                        bytes_to_copy = offset;
422
                        if (bytes_to_copy > len)
423
                                bytes_to_copy = len;
424
 
425
                        offset -= bytes_to_copy;
426
                        pos -= bytes_to_copy;
427
                        str -= bytes_to_copy;
428
                        len -= bytes_to_copy;
429
 
430
                        if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
431
                                struct page *page;
432
 
433
                                page = get_arg_page(bprm, pos, 1);
434
                                if (!page) {
435
                                        ret = -E2BIG;
436
                                        goto out;
437
                                }
438
 
439
                                if (kmapped_page) {
440
                                        flush_kernel_dcache_page(kmapped_page);
441
                                        kunmap(kmapped_page);
442
                                        put_arg_page(kmapped_page);
443
                                }
444
                                kmapped_page = page;
445
                                kaddr = kmap(kmapped_page);
446
                                kpos = pos & PAGE_MASK;
447
                                flush_arg_page(bprm, kpos, kmapped_page);
448
                        }
449
                        if (copy_from_user(kaddr+offset, str, bytes_to_copy)) {
450
                                ret = -EFAULT;
451
                                goto out;
452
                        }
453
                }
454
        }
455
        ret = 0;
456
out:
457
        if (kmapped_page) {
458
                flush_kernel_dcache_page(kmapped_page);
459
                kunmap(kmapped_page);
460
                put_arg_page(kmapped_page);
461
        }
462
        return ret;
463
}
464
 
465
/*
466
 * Like copy_strings, but get argv and its values from kernel memory.
467
 */
468
int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm)
469
{
470
        int r;
471
        mm_segment_t oldfs = get_fs();
472
        set_fs(KERNEL_DS);
473
        r = copy_strings(argc, (char __user * __user *)argv, bprm);
474
        set_fs(oldfs);
475
        return r;
476
}
477
EXPORT_SYMBOL(copy_strings_kernel);
478
 
479
#ifdef CONFIG_MMU
480
 
481
/*
482
 * During bprm_mm_init(), we create a temporary stack at STACK_TOP_MAX.  Once
483
 * the binfmt code determines where the new stack should reside, we shift it to
484
 * its final location.  The process proceeds as follows:
485
 *
486
 * 1) Use shift to calculate the new vma endpoints.
487
 * 2) Extend vma to cover both the old and new ranges.  This ensures the
488
 *    arguments passed to subsequent functions are consistent.
489
 * 3) Move vma's page tables to the new range.
490
 * 4) Free up any cleared pgd range.
491
 * 5) Shrink the vma to cover only the new range.
492
 */
493
static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
494
{
495
        struct mm_struct *mm = vma->vm_mm;
496
        unsigned long old_start = vma->vm_start;
497
        unsigned long old_end = vma->vm_end;
498
        unsigned long length = old_end - old_start;
499
        unsigned long new_start = old_start - shift;
500
        unsigned long new_end = old_end - shift;
501
        struct mmu_gather *tlb;
502
 
503
        BUG_ON(new_start > new_end);
504
 
505
        /*
506
         * ensure there are no vmas between where we want to go
507
         * and where we are
508
         */
509
        if (vma != find_vma(mm, new_start))
510
                return -EFAULT;
511
 
512
        /*
513
         * cover the whole range: [new_start, old_end)
514
         */
515
        vma_adjust(vma, new_start, old_end, vma->vm_pgoff, NULL);
516
 
517
        /*
518
         * move the page tables downwards, on failure we rely on
519
         * process cleanup to remove whatever mess we made.
520
         */
521
        if (length != move_page_tables(vma, old_start,
522
                                       vma, new_start, length))
523
                return -ENOMEM;
524
 
525
        lru_add_drain();
526
        tlb = tlb_gather_mmu(mm, 0);
527
        if (new_end > old_start) {
528
                /*
529
                 * when the old and new regions overlap clear from new_end.
530
                 */
531
                free_pgd_range(&tlb, new_end, old_end, new_end,
532
                        vma->vm_next ? vma->vm_next->vm_start : 0);
533
        } else {
534
                /*
535
                 * otherwise, clean from old_start; this is done to not touch
536
                 * the address space in [new_end, old_start) some architectures
537
                 * have constraints on va-space that make this illegal (IA64) -
538
                 * for the others its just a little faster.
539
                 */
540
                free_pgd_range(&tlb, old_start, old_end, new_end,
541
                        vma->vm_next ? vma->vm_next->vm_start : 0);
542
        }
543
        tlb_finish_mmu(tlb, new_end, old_end);
544
 
545
        /*
546
         * shrink the vma to just the new range.
547
         */
548
        vma_adjust(vma, new_start, new_end, vma->vm_pgoff, NULL);
549
 
550
        return 0;
551
}
552
 
553
#define EXTRA_STACK_VM_PAGES    20      /* random */
554
 
555
/*
556
 * Finalizes the stack vm_area_struct. The flags and permissions are updated,
557
 * the stack is optionally relocated, and some extra space is added.
558
 */
559
int setup_arg_pages(struct linux_binprm *bprm,
560
                    unsigned long stack_top,
561
                    int executable_stack)
562
{
563
        unsigned long ret;
564
        unsigned long stack_shift;
565
        struct mm_struct *mm = current->mm;
566
        struct vm_area_struct *vma = bprm->vma;
567
        struct vm_area_struct *prev = NULL;
568
        unsigned long vm_flags;
569
        unsigned long stack_base;
570
 
571
#ifdef CONFIG_STACK_GROWSUP
572
        /* Limit stack size to 1GB */
573
        stack_base = current->signal->rlim[RLIMIT_STACK].rlim_max;
574
        if (stack_base > (1 << 30))
575
                stack_base = 1 << 30;
576
 
577
        /* Make sure we didn't let the argument array grow too large. */
578
        if (vma->vm_end - vma->vm_start > stack_base)
579
                return -ENOMEM;
580
 
581
        stack_base = PAGE_ALIGN(stack_top - stack_base);
582
 
583
        stack_shift = vma->vm_start - stack_base;
584
        mm->arg_start = bprm->p - stack_shift;
585
        bprm->p = vma->vm_end - stack_shift;
586
#else
587
        stack_top = arch_align_stack(stack_top);
588
        stack_top = PAGE_ALIGN(stack_top);
589
        stack_shift = vma->vm_end - stack_top;
590
 
591
        bprm->p -= stack_shift;
592
        mm->arg_start = bprm->p;
593
#endif
594
 
595
        if (bprm->loader)
596
                bprm->loader -= stack_shift;
597
        bprm->exec -= stack_shift;
598
 
599
        down_write(&mm->mmap_sem);
600
        vm_flags = vma->vm_flags;
601
 
602
        /*
603
         * Adjust stack execute permissions; explicitly enable for
604
         * EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X and leave alone
605
         * (arch default) otherwise.
606
         */
607
        if (unlikely(executable_stack == EXSTACK_ENABLE_X))
608
                vm_flags |= VM_EXEC;
609
        else if (executable_stack == EXSTACK_DISABLE_X)
610
                vm_flags &= ~VM_EXEC;
611
        vm_flags |= mm->def_flags;
612
 
613
        ret = mprotect_fixup(vma, &prev, vma->vm_start, vma->vm_end,
614
                        vm_flags);
615
        if (ret)
616
                goto out_unlock;
617
        BUG_ON(prev != vma);
618
 
619
        /* Move stack pages down in memory. */
620
        if (stack_shift) {
621
                ret = shift_arg_pages(vma, stack_shift);
622
                if (ret) {
623
                        up_write(&mm->mmap_sem);
624
                        return ret;
625
                }
626
        }
627
 
628
#ifdef CONFIG_STACK_GROWSUP
629
        stack_base = vma->vm_end + EXTRA_STACK_VM_PAGES * PAGE_SIZE;
630
#else
631
        stack_base = vma->vm_start - EXTRA_STACK_VM_PAGES * PAGE_SIZE;
632
#endif
633
        ret = expand_stack(vma, stack_base);
634
        if (ret)
635
                ret = -EFAULT;
636
 
637
out_unlock:
638
        up_write(&mm->mmap_sem);
639
        return 0;
640
}
641
EXPORT_SYMBOL(setup_arg_pages);
642
 
643
#endif /* CONFIG_MMU */
644
 
645
struct file *open_exec(const char *name)
646
{
647
        struct nameidata nd;
648
        int err;
649
        struct file *file;
650
 
651
        err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC);
652
        file = ERR_PTR(err);
653
 
654
        if (!err) {
655
                struct inode *inode = nd.dentry->d_inode;
656
                file = ERR_PTR(-EACCES);
657
                if (S_ISREG(inode->i_mode)) {
658
                        int err = vfs_permission(&nd, MAY_EXEC);
659
                        file = ERR_PTR(err);
660
                        if (!err) {
661
                                file = nameidata_to_filp(&nd, O_RDONLY);
662
                                if (!IS_ERR(file)) {
663
                                        err = deny_write_access(file);
664
                                        if (err) {
665
                                                fput(file);
666
                                                file = ERR_PTR(err);
667
                                        }
668
                                }
669
out:
670
                                return file;
671
                        }
672
                }
673
                release_open_intent(&nd);
674
                path_release(&nd);
675
        }
676
        goto out;
677
}
678
 
679
EXPORT_SYMBOL(open_exec);
680
 
681
int kernel_read(struct file *file, unsigned long offset,
682
        char *addr, unsigned long count)
683
{
684
        mm_segment_t old_fs;
685
        loff_t pos = offset;
686
        int result;
687
 
688
        old_fs = get_fs();
689
        set_fs(get_ds());
690
        /* The cast to a user pointer is valid due to the set_fs() */
691
        result = vfs_read(file, (void __user *)addr, count, &pos);
692
        set_fs(old_fs);
693
        return result;
694
}
695
 
696
EXPORT_SYMBOL(kernel_read);
697
 
698
static int exec_mmap(struct mm_struct *mm)
699
{
700
        struct task_struct *tsk;
701
        struct mm_struct * old_mm, *active_mm;
702
 
703
        /* Notify parent that we're no longer interested in the old VM */
704
        tsk = current;
705
        old_mm = current->mm;
706
        mm_release(tsk, old_mm);
707
 
708
        if (old_mm) {
709
                /*
710
                 * Make sure that if there is a core dump in progress
711
                 * for the old mm, we get out and die instead of going
712
                 * through with the exec.  We must hold mmap_sem around
713
                 * checking core_waiters and changing tsk->mm.  The
714
                 * core-inducing thread will increment core_waiters for
715
                 * each thread whose ->mm == old_mm.
716
                 */
717
                down_read(&old_mm->mmap_sem);
718
                if (unlikely(old_mm->core_waiters)) {
719
                        up_read(&old_mm->mmap_sem);
720
                        return -EINTR;
721
                }
722
        }
723
        task_lock(tsk);
724
        active_mm = tsk->active_mm;
725
        tsk->mm = mm;
726
        tsk->active_mm = mm;
727
        activate_mm(active_mm, mm);
728
        task_unlock(tsk);
729
        arch_pick_mmap_layout(mm);
730
        if (old_mm) {
731
                up_read(&old_mm->mmap_sem);
732
                BUG_ON(active_mm != old_mm);
733
                mmput(old_mm);
734
                return 0;
735
        }
736
        mmdrop(active_mm);
737
        return 0;
738
}
739
 
740
/*
741
 * This function makes sure the current process has its own signal table,
742
 * so that flush_signal_handlers can later reset the handlers without
743
 * disturbing other processes.  (Other processes might share the signal
744
 * table via the CLONE_SIGHAND option to clone().)
745
 */
746
static int de_thread(struct task_struct *tsk)
747
{
748
        struct signal_struct *sig = tsk->signal;
749
        struct sighand_struct *oldsighand = tsk->sighand;
750
        spinlock_t *lock = &oldsighand->siglock;
751
        struct task_struct *leader = NULL;
752
        int count;
753
 
754
        if (thread_group_empty(tsk))
755
                goto no_thread_group;
756
 
757
        /*
758
         * Kill all other threads in the thread group.
759
         * We must hold tasklist_lock to call zap_other_threads.
760
         */
761
        read_lock(&tasklist_lock);
762
        spin_lock_irq(lock);
763
        if (sig->flags & SIGNAL_GROUP_EXIT) {
764
                /*
765
                 * Another group action in progress, just
766
                 * return so that the signal is processed.
767
                 */
768
                spin_unlock_irq(lock);
769
                read_unlock(&tasklist_lock);
770
                return -EAGAIN;
771
        }
772
 
773
        /*
774
         * child_reaper ignores SIGKILL, change it now.
775
         * Reparenting needs write_lock on tasklist_lock,
776
         * so it is safe to do it under read_lock.
777
         */
778
        if (unlikely(tsk->group_leader == task_child_reaper(tsk)))
779
                task_active_pid_ns(tsk)->child_reaper = tsk;
780
 
781
        zap_other_threads(tsk);
782
        read_unlock(&tasklist_lock);
783
 
784
        /*
785
         * Account for the thread group leader hanging around:
786
         */
787
        count = 1;
788
        if (!thread_group_leader(tsk)) {
789
                count = 2;
790
                /*
791
                 * The SIGALRM timer survives the exec, but needs to point
792
                 * at us as the new group leader now.  We have a race with
793
                 * a timer firing now getting the old leader, so we need to
794
                 * synchronize with any firing (by calling del_timer_sync)
795
                 * before we can safely let the old group leader die.
796
                 */
797
                sig->tsk = tsk;
798
                spin_unlock_irq(lock);
799
                if (hrtimer_cancel(&sig->real_timer))
800
                        hrtimer_restart(&sig->real_timer);
801
                spin_lock_irq(lock);
802
        }
803
 
804
        sig->notify_count = count;
805
        sig->group_exit_task = tsk;
806
        while (atomic_read(&sig->count) > count) {
807
                __set_current_state(TASK_UNINTERRUPTIBLE);
808
                spin_unlock_irq(lock);
809
                schedule();
810
                spin_lock_irq(lock);
811
        }
812
        spin_unlock_irq(lock);
813
 
814
        /*
815
         * At this point all other threads have exited, all we have to
816
         * do is to wait for the thread group leader to become inactive,
817
         * and to assume its PID:
818
         */
819
        if (!thread_group_leader(tsk)) {
820
                leader = tsk->group_leader;
821
 
822
                sig->notify_count = -1;
823
                for (;;) {
824
                        write_lock_irq(&tasklist_lock);
825
                        if (likely(leader->exit_state))
826
                                break;
827
                        __set_current_state(TASK_UNINTERRUPTIBLE);
828
                        write_unlock_irq(&tasklist_lock);
829
                        schedule();
830
                }
831
 
832
                /*
833
                 * The only record we have of the real-time age of a
834
                 * process, regardless of execs it's done, is start_time.
835
                 * All the past CPU time is accumulated in signal_struct
836
                 * from sister threads now dead.  But in this non-leader
837
                 * exec, nothing survives from the original leader thread,
838
                 * whose birth marks the true age of this process now.
839
                 * When we take on its identity by switching to its PID, we
840
                 * also take its birthdate (always earlier than our own).
841
                 */
842
                tsk->start_time = leader->start_time;
843
 
844
                BUG_ON(!same_thread_group(leader, tsk));
845
                BUG_ON(has_group_leader_pid(tsk));
846
                /*
847
                 * An exec() starts a new thread group with the
848
                 * TGID of the previous thread group. Rehash the
849
                 * two threads with a switched PID, and release
850
                 * the former thread group leader:
851
                 */
852
 
853
                /* Become a process group leader with the old leader's pid.
854
                 * The old leader becomes a thread of the this thread group.
855
                 * Note: The old leader also uses this pid until release_task
856
                 *       is called.  Odd but simple and correct.
857
                 */
858
                detach_pid(tsk, PIDTYPE_PID);
859
                tsk->pid = leader->pid;
860
                attach_pid(tsk, PIDTYPE_PID,  task_pid(leader));
861
                transfer_pid(leader, tsk, PIDTYPE_PGID);
862
                transfer_pid(leader, tsk, PIDTYPE_SID);
863
                list_replace_rcu(&leader->tasks, &tsk->tasks);
864
 
865
                tsk->group_leader = tsk;
866
                leader->group_leader = tsk;
867
 
868
                tsk->exit_signal = SIGCHLD;
869
 
870
                BUG_ON(leader->exit_state != EXIT_ZOMBIE);
871
                leader->exit_state = EXIT_DEAD;
872
 
873
                write_unlock_irq(&tasklist_lock);
874
        }
875
 
876
        sig->group_exit_task = NULL;
877
        sig->notify_count = 0;
878
        /*
879
         * There may be one thread left which is just exiting,
880
         * but it's safe to stop telling the group to kill themselves.
881
         */
882
        sig->flags = 0;
883
 
884
no_thread_group:
885
        exit_itimers(sig);
886
        if (leader)
887
                release_task(leader);
888
 
889
        if (atomic_read(&oldsighand->count) != 1) {
890
                struct sighand_struct *newsighand;
891
                /*
892
                 * This ->sighand is shared with the CLONE_SIGHAND
893
                 * but not CLONE_THREAD task, switch to the new one.
894
                 */
895
                newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
896
                if (!newsighand)
897
                        return -ENOMEM;
898
 
899
                atomic_set(&newsighand->count, 1);
900
                memcpy(newsighand->action, oldsighand->action,
901
                       sizeof(newsighand->action));
902
 
903
                write_lock_irq(&tasklist_lock);
904
                spin_lock(&oldsighand->siglock);
905
                rcu_assign_pointer(tsk->sighand, newsighand);
906
                spin_unlock(&oldsighand->siglock);
907
                write_unlock_irq(&tasklist_lock);
908
 
909
                __cleanup_sighand(oldsighand);
910
        }
911
 
912
        BUG_ON(!thread_group_leader(tsk));
913
        return 0;
914
}
915
 
916
/*
917
 * These functions flushes out all traces of the currently running executable
918
 * so that a new one can be started
919
 */
920
static void flush_old_files(struct files_struct * files)
921
{
922
        long j = -1;
923
        struct fdtable *fdt;
924
 
925
        spin_lock(&files->file_lock);
926
        for (;;) {
927
                unsigned long set, i;
928
 
929
                j++;
930
                i = j * __NFDBITS;
931
                fdt = files_fdtable(files);
932
                if (i >= fdt->max_fds)
933
                        break;
934
                set = fdt->close_on_exec->fds_bits[j];
935
                if (!set)
936
                        continue;
937
                fdt->close_on_exec->fds_bits[j] = 0;
938
                spin_unlock(&files->file_lock);
939
                for ( ; set ; i++,set >>= 1) {
940
                        if (set & 1) {
941
                                sys_close(i);
942
                        }
943
                }
944
                spin_lock(&files->file_lock);
945
 
946
        }
947
        spin_unlock(&files->file_lock);
948
}
949
 
950
void get_task_comm(char *buf, struct task_struct *tsk)
951
{
952
        /* buf must be at least sizeof(tsk->comm) in size */
953
        task_lock(tsk);
954
        strncpy(buf, tsk->comm, sizeof(tsk->comm));
955
        task_unlock(tsk);
956
}
957
 
958
void set_task_comm(struct task_struct *tsk, char *buf)
959
{
960
        task_lock(tsk);
961
        strlcpy(tsk->comm, buf, sizeof(tsk->comm));
962
        task_unlock(tsk);
963
}
964
 
965
int flush_old_exec(struct linux_binprm * bprm)
966
{
967
        char * name;
968
        int i, ch, retval;
969
        struct files_struct *files;
970
        char tcomm[sizeof(current->comm)];
971
 
972
        /*
973
         * Make sure we have a private signal table and that
974
         * we are unassociated from the previous thread group.
975
         */
976
        retval = de_thread(current);
977
        if (retval)
978
                goto out;
979
 
980
        /*
981
         * Make sure we have private file handles. Ask the
982
         * fork helper to do the work for us and the exit
983
         * helper to do the cleanup of the old one.
984
         */
985
        files = current->files;         /* refcounted so safe to hold */
986
        retval = unshare_files();
987
        if (retval)
988
                goto out;
989
        /*
990
         * Release all of the old mmap stuff
991
         */
992
        retval = exec_mmap(bprm->mm);
993
        if (retval)
994
                goto mmap_failed;
995
 
996
        bprm->mm = NULL;                /* We're using it now */
997
 
998
        /* This is the point of no return */
999
        put_files_struct(files);
1000
 
1001
        current->sas_ss_sp = current->sas_ss_size = 0;
1002
 
1003
        if (current->euid == current->uid && current->egid == current->gid)
1004
                set_dumpable(current->mm, 1);
1005
        else
1006
                set_dumpable(current->mm, suid_dumpable);
1007
 
1008
        name = bprm->filename;
1009
 
1010
        /* Copies the binary name from after last slash */
1011
        for (i=0; (ch = *(name++)) != '\0';) {
1012
                if (ch == '/')
1013
                        i = 0; /* overwrite what we wrote */
1014
                else
1015
                        if (i < (sizeof(tcomm) - 1))
1016
                                tcomm[i++] = ch;
1017
        }
1018
        tcomm[i] = '\0';
1019
        set_task_comm(current, tcomm);
1020
 
1021
        current->flags &= ~PF_RANDOMIZE;
1022
        flush_thread();
1023
 
1024
        /* Set the new mm task size. We have to do that late because it may
1025
         * depend on TIF_32BIT which is only updated in flush_thread() on
1026
         * some architectures like powerpc
1027
         */
1028
        current->mm->task_size = TASK_SIZE;
1029
 
1030
        if (bprm->e_uid != current->euid || bprm->e_gid != current->egid) {
1031
                suid_keys(current);
1032
                set_dumpable(current->mm, suid_dumpable);
1033
                current->pdeath_signal = 0;
1034
        } else if (file_permission(bprm->file, MAY_READ) ||
1035
                        (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) {
1036
                suid_keys(current);
1037
                set_dumpable(current->mm, suid_dumpable);
1038
        }
1039
 
1040
        /* An exec changes our domain. We are no longer part of the thread
1041
           group */
1042
 
1043
        current->self_exec_id++;
1044
 
1045
        flush_signal_handlers(current, 0);
1046
        flush_old_files(current->files);
1047
 
1048
        return 0;
1049
 
1050
mmap_failed:
1051
        reset_files_struct(current, files);
1052
out:
1053
        return retval;
1054
}
1055
 
1056
EXPORT_SYMBOL(flush_old_exec);
1057
 
1058
/*
1059
 * Fill the binprm structure from the inode.
1060
 * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
1061
 */
1062
int prepare_binprm(struct linux_binprm *bprm)
1063
{
1064
        int mode;
1065
        struct inode * inode = bprm->file->f_path.dentry->d_inode;
1066
        int retval;
1067
 
1068
        mode = inode->i_mode;
1069
        if (bprm->file->f_op == NULL)
1070
                return -EACCES;
1071
 
1072
        bprm->e_uid = current->euid;
1073
        bprm->e_gid = current->egid;
1074
 
1075
        if(!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)) {
1076
                /* Set-uid? */
1077
                if (mode & S_ISUID) {
1078
                        current->personality &= ~PER_CLEAR_ON_SETID;
1079
                        bprm->e_uid = inode->i_uid;
1080
                }
1081
 
1082
                /* Set-gid? */
1083
                /*
1084
                 * If setgid is set but no group execute bit then this
1085
                 * is a candidate for mandatory locking, not a setgid
1086
                 * executable.
1087
                 */
1088
                if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
1089
                        current->personality &= ~PER_CLEAR_ON_SETID;
1090
                        bprm->e_gid = inode->i_gid;
1091
                }
1092
        }
1093
 
1094
        /* fill in binprm security blob */
1095
        retval = security_bprm_set(bprm);
1096
        if (retval)
1097
                return retval;
1098
 
1099
        memset(bprm->buf,0,BINPRM_BUF_SIZE);
1100
        return kernel_read(bprm->file,0,bprm->buf,BINPRM_BUF_SIZE);
1101
}
1102
 
1103
EXPORT_SYMBOL(prepare_binprm);
1104
 
1105
static int unsafe_exec(struct task_struct *p)
1106
{
1107
        int unsafe = 0;
1108
        if (p->ptrace & PT_PTRACED) {
1109
                if (p->ptrace & PT_PTRACE_CAP)
1110
                        unsafe |= LSM_UNSAFE_PTRACE_CAP;
1111
                else
1112
                        unsafe |= LSM_UNSAFE_PTRACE;
1113
        }
1114
        if (atomic_read(&p->fs->count) > 1 ||
1115
            atomic_read(&p->files->count) > 1 ||
1116
            atomic_read(&p->sighand->count) > 1)
1117
                unsafe |= LSM_UNSAFE_SHARE;
1118
 
1119
        return unsafe;
1120
}
1121
 
1122
void compute_creds(struct linux_binprm *bprm)
1123
{
1124
        int unsafe;
1125
 
1126
        if (bprm->e_uid != current->uid) {
1127
                suid_keys(current);
1128
                current->pdeath_signal = 0;
1129
        }
1130
        exec_keys(current);
1131
 
1132
        task_lock(current);
1133
        unsafe = unsafe_exec(current);
1134
        security_bprm_apply_creds(bprm, unsafe);
1135
        task_unlock(current);
1136
        security_bprm_post_apply_creds(bprm);
1137
}
1138
EXPORT_SYMBOL(compute_creds);
1139
 
1140
/*
1141
 * Arguments are '\0' separated strings found at the location bprm->p
1142
 * points to; chop off the first by relocating brpm->p to right after
1143
 * the first '\0' encountered.
1144
 */
1145
int remove_arg_zero(struct linux_binprm *bprm)
1146
{
1147
        int ret = 0;
1148
        unsigned long offset;
1149
        char *kaddr;
1150
        struct page *page;
1151
 
1152
        if (!bprm->argc)
1153
                return 0;
1154
 
1155
        do {
1156
                offset = bprm->p & ~PAGE_MASK;
1157
                page = get_arg_page(bprm, bprm->p, 0);
1158
                if (!page) {
1159
                        ret = -EFAULT;
1160
                        goto out;
1161
                }
1162
                kaddr = kmap_atomic(page, KM_USER0);
1163
 
1164
                for (; offset < PAGE_SIZE && kaddr[offset];
1165
                                offset++, bprm->p++)
1166
                        ;
1167
 
1168
                kunmap_atomic(kaddr, KM_USER0);
1169
                put_arg_page(page);
1170
 
1171
                if (offset == PAGE_SIZE)
1172
                        free_arg_page(bprm, (bprm->p >> PAGE_SHIFT) - 1);
1173
        } while (offset == PAGE_SIZE);
1174
 
1175
        bprm->p++;
1176
        bprm->argc--;
1177
        ret = 0;
1178
 
1179
out:
1180
        return ret;
1181
}
1182
EXPORT_SYMBOL(remove_arg_zero);
1183
 
1184
/*
1185
 * cycle the list of binary formats handler, until one recognizes the image
1186
 */
1187
int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1188
{
1189
        int try,retval;
1190
        struct linux_binfmt *fmt;
1191
#ifdef __alpha__
1192
        /* handle /sbin/loader.. */
1193
        {
1194
            struct exec * eh = (struct exec *) bprm->buf;
1195
 
1196
            if (!bprm->loader && eh->fh.f_magic == 0x183 &&
1197
                (eh->fh.f_flags & 0x3000) == 0x3000)
1198
            {
1199
                struct file * file;
1200
                unsigned long loader;
1201
 
1202
                allow_write_access(bprm->file);
1203
                fput(bprm->file);
1204
                bprm->file = NULL;
1205
 
1206
                loader = bprm->vma->vm_end - sizeof(void *);
1207
 
1208
                file = open_exec("/sbin/loader");
1209
                retval = PTR_ERR(file);
1210
                if (IS_ERR(file))
1211
                        return retval;
1212
 
1213
                /* Remember if the application is TASO.  */
1214
                bprm->sh_bang = eh->ah.entry < 0x100000000UL;
1215
 
1216
                bprm->file = file;
1217
                bprm->loader = loader;
1218
                retval = prepare_binprm(bprm);
1219
                if (retval<0)
1220
                        return retval;
1221
                /* should call search_binary_handler recursively here,
1222
                   but it does not matter */
1223
            }
1224
        }
1225
#endif
1226
        retval = security_bprm_check(bprm);
1227
        if (retval)
1228
                return retval;
1229
 
1230
        /* kernel module loader fixup */
1231
        /* so we don't try to load run modprobe in kernel space. */
1232
        set_fs(USER_DS);
1233
 
1234
        retval = audit_bprm(bprm);
1235
        if (retval)
1236
                return retval;
1237
 
1238
        retval = -ENOENT;
1239
        for (try=0; try<2; try++) {
1240
                read_lock(&binfmt_lock);
1241
                list_for_each_entry(fmt, &formats, lh) {
1242
                        int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
1243
                        if (!fn)
1244
                                continue;
1245
                        if (!try_module_get(fmt->module))
1246
                                continue;
1247
                        read_unlock(&binfmt_lock);
1248
                        retval = fn(bprm, regs);
1249
                        if (retval >= 0) {
1250
                                put_binfmt(fmt);
1251
                                allow_write_access(bprm->file);
1252
                                if (bprm->file)
1253
                                        fput(bprm->file);
1254
                                bprm->file = NULL;
1255
                                current->did_exec = 1;
1256
                                proc_exec_connector(current);
1257
                                return retval;
1258
                        }
1259
                        read_lock(&binfmt_lock);
1260
                        put_binfmt(fmt);
1261
                        if (retval != -ENOEXEC || bprm->mm == NULL)
1262
                                break;
1263
                        if (!bprm->file) {
1264
                                read_unlock(&binfmt_lock);
1265
                                return retval;
1266
                        }
1267
                }
1268
                read_unlock(&binfmt_lock);
1269
                if (retval != -ENOEXEC || bprm->mm == NULL) {
1270
                        break;
1271
#ifdef CONFIG_KMOD
1272
                }else{
1273
#define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
1274
                        if (printable(bprm->buf[0]) &&
1275
                            printable(bprm->buf[1]) &&
1276
                            printable(bprm->buf[2]) &&
1277
                            printable(bprm->buf[3]))
1278
                                break; /* -ENOEXEC */
1279
                        request_module("binfmt-%04x", *(unsigned short *)(&bprm->buf[2]));
1280
#endif
1281
                }
1282
        }
1283
        return retval;
1284
}
1285
 
1286
EXPORT_SYMBOL(search_binary_handler);
1287
 
1288
/*
1289
 * sys_execve() executes a new program.
1290
 */
1291
int do_execve(char * filename,
1292
        char __user *__user *argv,
1293
        char __user *__user *envp,
1294
        struct pt_regs * regs)
1295
{
1296
        struct linux_binprm *bprm;
1297
        struct file *file;
1298
        unsigned long env_p;
1299
        int retval;
1300
 
1301
        retval = -ENOMEM;
1302
        bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
1303
        if (!bprm)
1304
                goto out_ret;
1305
 
1306
        file = open_exec(filename);
1307
        retval = PTR_ERR(file);
1308
        if (IS_ERR(file))
1309
                goto out_kfree;
1310
 
1311
        sched_exec();
1312
 
1313
        bprm->file = file;
1314
        bprm->filename = filename;
1315
        bprm->interp = filename;
1316
 
1317
        retval = bprm_mm_init(bprm);
1318
        if (retval)
1319
                goto out_file;
1320
 
1321
        bprm->argc = count(argv, MAX_ARG_STRINGS);
1322
        if ((retval = bprm->argc) < 0)
1323
                goto out_mm;
1324
 
1325
        bprm->envc = count(envp, MAX_ARG_STRINGS);
1326
        if ((retval = bprm->envc) < 0)
1327
                goto out_mm;
1328
 
1329
        retval = security_bprm_alloc(bprm);
1330
        if (retval)
1331
                goto out;
1332
 
1333
        retval = prepare_binprm(bprm);
1334
        if (retval < 0)
1335
                goto out;
1336
 
1337
        retval = copy_strings_kernel(1, &bprm->filename, bprm);
1338
        if (retval < 0)
1339
                goto out;
1340
 
1341
        bprm->exec = bprm->p;
1342
        retval = copy_strings(bprm->envc, envp, bprm);
1343
        if (retval < 0)
1344
                goto out;
1345
 
1346
        env_p = bprm->p;
1347
        retval = copy_strings(bprm->argc, argv, bprm);
1348
        if (retval < 0)
1349
                goto out;
1350
        bprm->argv_len = env_p - bprm->p;
1351
 
1352
        retval = search_binary_handler(bprm,regs);
1353
        if (retval >= 0) {
1354
                /* execve success */
1355
                free_arg_pages(bprm);
1356
                security_bprm_free(bprm);
1357
                acct_update_integrals(current);
1358
                kfree(bprm);
1359
                return retval;
1360
        }
1361
 
1362
out:
1363
        free_arg_pages(bprm);
1364
        if (bprm->security)
1365
                security_bprm_free(bprm);
1366
 
1367
out_mm:
1368
        if (bprm->mm)
1369
                mmput (bprm->mm);
1370
 
1371
out_file:
1372
        if (bprm->file) {
1373
                allow_write_access(bprm->file);
1374
                fput(bprm->file);
1375
        }
1376
out_kfree:
1377
        kfree(bprm);
1378
 
1379
out_ret:
1380
        return retval;
1381
}
1382
 
1383
int set_binfmt(struct linux_binfmt *new)
1384
{
1385
        struct linux_binfmt *old = current->binfmt;
1386
 
1387
        if (new) {
1388
                if (!try_module_get(new->module))
1389
                        return -1;
1390
        }
1391
        current->binfmt = new;
1392
        if (old)
1393
                module_put(old->module);
1394
        return 0;
1395
}
1396
 
1397
EXPORT_SYMBOL(set_binfmt);
1398
 
1399
/* format_corename will inspect the pattern parameter, and output a
1400
 * name into corename, which must have space for at least
1401
 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
1402
 */
1403
static int format_corename(char *corename, const char *pattern, long signr)
1404
{
1405
        const char *pat_ptr = pattern;
1406
        char *out_ptr = corename;
1407
        char *const out_end = corename + CORENAME_MAX_SIZE;
1408
        int rc;
1409
        int pid_in_pattern = 0;
1410
        int ispipe = 0;
1411
 
1412
        if (*pattern == '|')
1413
                ispipe = 1;
1414
 
1415
        /* Repeat as long as we have more pattern to process and more output
1416
           space */
1417
        while (*pat_ptr) {
1418
                if (*pat_ptr != '%') {
1419
                        if (out_ptr == out_end)
1420
                                goto out;
1421
                        *out_ptr++ = *pat_ptr++;
1422
                } else {
1423
                        switch (*++pat_ptr) {
1424
                        case 0:
1425
                                goto out;
1426
                        /* Double percent, output one percent */
1427
                        case '%':
1428
                                if (out_ptr == out_end)
1429
                                        goto out;
1430
                                *out_ptr++ = '%';
1431
                                break;
1432
                        /* pid */
1433
                        case 'p':
1434
                                pid_in_pattern = 1;
1435
                                rc = snprintf(out_ptr, out_end - out_ptr,
1436
                                              "%d", task_tgid_vnr(current));
1437
                                if (rc > out_end - out_ptr)
1438
                                        goto out;
1439
                                out_ptr += rc;
1440
                                break;
1441
                        /* uid */
1442
                        case 'u':
1443
                                rc = snprintf(out_ptr, out_end - out_ptr,
1444
                                              "%d", current->uid);
1445
                                if (rc > out_end - out_ptr)
1446
                                        goto out;
1447
                                out_ptr += rc;
1448
                                break;
1449
                        /* gid */
1450
                        case 'g':
1451
                                rc = snprintf(out_ptr, out_end - out_ptr,
1452
                                              "%d", current->gid);
1453
                                if (rc > out_end - out_ptr)
1454
                                        goto out;
1455
                                out_ptr += rc;
1456
                                break;
1457
                        /* signal that caused the coredump */
1458
                        case 's':
1459
                                rc = snprintf(out_ptr, out_end - out_ptr,
1460
                                              "%ld", signr);
1461
                                if (rc > out_end - out_ptr)
1462
                                        goto out;
1463
                                out_ptr += rc;
1464
                                break;
1465
                        /* UNIX time of coredump */
1466
                        case 't': {
1467
                                struct timeval tv;
1468
                                do_gettimeofday(&tv);
1469
                                rc = snprintf(out_ptr, out_end - out_ptr,
1470
                                              "%lu", tv.tv_sec);
1471
                                if (rc > out_end - out_ptr)
1472
                                        goto out;
1473
                                out_ptr += rc;
1474
                                break;
1475
                        }
1476
                        /* hostname */
1477
                        case 'h':
1478
                                down_read(&uts_sem);
1479
                                rc = snprintf(out_ptr, out_end - out_ptr,
1480
                                              "%s", utsname()->nodename);
1481
                                up_read(&uts_sem);
1482
                                if (rc > out_end - out_ptr)
1483
                                        goto out;
1484
                                out_ptr += rc;
1485
                                break;
1486
                        /* executable */
1487
                        case 'e':
1488
                                rc = snprintf(out_ptr, out_end - out_ptr,
1489
                                              "%s", current->comm);
1490
                                if (rc > out_end - out_ptr)
1491
                                        goto out;
1492
                                out_ptr += rc;
1493
                                break;
1494
                        /* core limit size */
1495
                        case 'c':
1496
                                rc = snprintf(out_ptr, out_end - out_ptr,
1497
                                              "%lu", current->signal->rlim[RLIMIT_CORE].rlim_cur);
1498
                                if (rc > out_end - out_ptr)
1499
                                        goto out;
1500
                                out_ptr += rc;
1501
                                break;
1502
                        default:
1503
                                break;
1504
                        }
1505
                        ++pat_ptr;
1506
                }
1507
        }
1508
        /* Backward compatibility with core_uses_pid:
1509
         *
1510
         * If core_pattern does not include a %p (as is the default)
1511
         * and core_uses_pid is set, then .%pid will be appended to
1512
         * the filename. Do not do this for piped commands. */
1513
        if (!ispipe && !pid_in_pattern
1514
            && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) {
1515
                rc = snprintf(out_ptr, out_end - out_ptr,
1516
                              ".%d", task_tgid_vnr(current));
1517
                if (rc > out_end - out_ptr)
1518
                        goto out;
1519
                out_ptr += rc;
1520
        }
1521
out:
1522
        *out_ptr = 0;
1523
        return ispipe;
1524
}
1525
 
1526
static void zap_process(struct task_struct *start)
1527
{
1528
        struct task_struct *t;
1529
 
1530
        start->signal->flags = SIGNAL_GROUP_EXIT;
1531
        start->signal->group_stop_count = 0;
1532
 
1533
        t = start;
1534
        do {
1535
                if (t != current && t->mm) {
1536
                        t->mm->core_waiters++;
1537
                        sigaddset(&t->pending.signal, SIGKILL);
1538
                        signal_wake_up(t, 1);
1539
                }
1540
        } while ((t = next_thread(t)) != start);
1541
}
1542
 
1543
static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
1544
                                int exit_code)
1545
{
1546
        struct task_struct *g, *p;
1547
        unsigned long flags;
1548
        int err = -EAGAIN;
1549
 
1550
        spin_lock_irq(&tsk->sighand->siglock);
1551
        if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) {
1552
                tsk->signal->group_exit_code = exit_code;
1553
                zap_process(tsk);
1554
                err = 0;
1555
        }
1556
        spin_unlock_irq(&tsk->sighand->siglock);
1557
        if (err)
1558
                return err;
1559
 
1560
        if (atomic_read(&mm->mm_users) == mm->core_waiters + 1)
1561
                goto done;
1562
 
1563
        rcu_read_lock();
1564
        for_each_process(g) {
1565
                if (g == tsk->group_leader)
1566
                        continue;
1567
 
1568
                p = g;
1569
                do {
1570
                        if (p->mm) {
1571
                                if (p->mm == mm) {
1572
                                        /*
1573
                                         * p->sighand can't disappear, but
1574
                                         * may be changed by de_thread()
1575
                                         */
1576
                                        lock_task_sighand(p, &flags);
1577
                                        zap_process(p);
1578
                                        unlock_task_sighand(p, &flags);
1579
                                }
1580
                                break;
1581
                        }
1582
                } while ((p = next_thread(p)) != g);
1583
        }
1584
        rcu_read_unlock();
1585
done:
1586
        return mm->core_waiters;
1587
}
1588
 
1589
static int coredump_wait(int exit_code)
1590
{
1591
        struct task_struct *tsk = current;
1592
        struct mm_struct *mm = tsk->mm;
1593
        struct completion startup_done;
1594
        struct completion *vfork_done;
1595
        int core_waiters;
1596
 
1597
        init_completion(&mm->core_done);
1598
        init_completion(&startup_done);
1599
        mm->core_startup_done = &startup_done;
1600
 
1601
        core_waiters = zap_threads(tsk, mm, exit_code);
1602
        up_write(&mm->mmap_sem);
1603
 
1604
        if (unlikely(core_waiters < 0))
1605
                goto fail;
1606
 
1607
        /*
1608
         * Make sure nobody is waiting for us to release the VM,
1609
         * otherwise we can deadlock when we wait on each other
1610
         */
1611
        vfork_done = tsk->vfork_done;
1612
        if (vfork_done) {
1613
                tsk->vfork_done = NULL;
1614
                complete(vfork_done);
1615
        }
1616
 
1617
        if (core_waiters)
1618
                wait_for_completion(&startup_done);
1619
fail:
1620
        BUG_ON(mm->core_waiters);
1621
        return core_waiters;
1622
}
1623
 
1624
/*
1625
 * set_dumpable converts traditional three-value dumpable to two flags and
1626
 * stores them into mm->flags.  It modifies lower two bits of mm->flags, but
1627
 * these bits are not changed atomically.  So get_dumpable can observe the
1628
 * intermediate state.  To avoid doing unexpected behavior, get get_dumpable
1629
 * return either old dumpable or new one by paying attention to the order of
1630
 * modifying the bits.
1631
 *
1632
 * dumpable |   mm->flags (binary)
1633
 * old  new | initial interim  final
1634
 * ---------+-----------------------
1635
 *  0    1  |   00      01      01
1636
 *  0    2  |   00      10(*)   11
1637
 *  1    0  |   01      00      00
1638
 *  1    2  |   01      11      11
1639
 *  2    0  |   11      10(*)   00
1640
 *  2    1  |   11      11      01
1641
 *
1642
 * (*) get_dumpable regards interim value of 10 as 11.
1643
 */
1644
void set_dumpable(struct mm_struct *mm, int value)
1645
{
1646
        switch (value) {
1647
        case 0:
1648
                clear_bit(MMF_DUMPABLE, &mm->flags);
1649
                smp_wmb();
1650
                clear_bit(MMF_DUMP_SECURELY, &mm->flags);
1651
                break;
1652
        case 1:
1653
                set_bit(MMF_DUMPABLE, &mm->flags);
1654
                smp_wmb();
1655
                clear_bit(MMF_DUMP_SECURELY, &mm->flags);
1656
                break;
1657
        case 2:
1658
                set_bit(MMF_DUMP_SECURELY, &mm->flags);
1659
                smp_wmb();
1660
                set_bit(MMF_DUMPABLE, &mm->flags);
1661
                break;
1662
        }
1663
}
1664
 
1665
int get_dumpable(struct mm_struct *mm)
1666
{
1667
        int ret;
1668
 
1669
        ret = mm->flags & 0x3;
1670
        return (ret >= 2) ? 2 : ret;
1671
}
1672
 
1673
int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1674
{
1675
        char corename[CORENAME_MAX_SIZE + 1];
1676
        struct mm_struct *mm = current->mm;
1677
        struct linux_binfmt * binfmt;
1678
        struct inode * inode;
1679
        struct file * file;
1680
        int retval = 0;
1681
        int fsuid = current->fsuid;
1682
        int flag = 0;
1683
        int ispipe = 0;
1684
        unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1685
        char **helper_argv = NULL;
1686
        int helper_argc = 0;
1687
        char *delimit;
1688
 
1689
        audit_core_dumps(signr);
1690
 
1691
        binfmt = current->binfmt;
1692
        if (!binfmt || !binfmt->core_dump)
1693
                goto fail;
1694
        down_write(&mm->mmap_sem);
1695
        /*
1696
         * If another thread got here first, or we are not dumpable, bail out.
1697
         */
1698
        if (mm->core_waiters || !get_dumpable(mm)) {
1699
                up_write(&mm->mmap_sem);
1700
                goto fail;
1701
        }
1702
 
1703
        /*
1704
         *      We cannot trust fsuid as being the "true" uid of the
1705
         *      process nor do we know its entire history. We only know it
1706
         *      was tainted so we dump it as root in mode 2.
1707
         */
1708
        if (get_dumpable(mm) == 2) {    /* Setuid core dump mode */
1709
                flag = O_EXCL;          /* Stop rewrite attacks */
1710
                current->fsuid = 0;      /* Dump root private */
1711
        }
1712
 
1713
        retval = coredump_wait(exit_code);
1714
        if (retval < 0)
1715
                goto fail;
1716
 
1717
        /*
1718
         * Clear any false indication of pending signals that might
1719
         * be seen by the filesystem code called to write the core file.
1720
         */
1721
        clear_thread_flag(TIF_SIGPENDING);
1722
 
1723
        /*
1724
         * lock_kernel() because format_corename() is controlled by sysctl, which
1725
         * uses lock_kernel()
1726
         */
1727
        lock_kernel();
1728
        ispipe = format_corename(corename, core_pattern, signr);
1729
        unlock_kernel();
1730
        /*
1731
         * Don't bother to check the RLIMIT_CORE value if core_pattern points
1732
         * to a pipe.  Since we're not writing directly to the filesystem
1733
         * RLIMIT_CORE doesn't really apply, as no actual core file will be
1734
         * created unless the pipe reader choses to write out the core file
1735
         * at which point file size limits and permissions will be imposed
1736
         * as it does with any other process
1737
         */
1738
        if ((!ispipe) && (core_limit < binfmt->min_coredump))
1739
                goto fail_unlock;
1740
 
1741
        if (ispipe) {
1742
                helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc);
1743
                /* Terminate the string before the first option */
1744
                delimit = strchr(corename, ' ');
1745
                if (delimit)
1746
                        *delimit = '\0';
1747
                delimit = strrchr(helper_argv[0], '/');
1748
                if (delimit)
1749
                        delimit++;
1750
                else
1751
                        delimit = helper_argv[0];
1752
                if (!strcmp(delimit, current->comm)) {
1753
                        printk(KERN_NOTICE "Recursive core dump detected, "
1754
                                        "aborting\n");
1755
                        goto fail_unlock;
1756
                }
1757
 
1758
                core_limit = RLIM_INFINITY;
1759
 
1760
                /* SIGPIPE can happen, but it's just never processed */
1761
                if (call_usermodehelper_pipe(corename+1, helper_argv, NULL,
1762
                                &file)) {
1763
                        printk(KERN_INFO "Core dump to %s pipe failed\n",
1764
                               corename);
1765
                        goto fail_unlock;
1766
                }
1767
        } else
1768
                file = filp_open(corename,
1769
                                 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
1770
                                 0600);
1771
        if (IS_ERR(file))
1772
                goto fail_unlock;
1773
        inode = file->f_path.dentry->d_inode;
1774
        if (inode->i_nlink > 1)
1775
                goto close_fail;        /* multiple links - don't dump */
1776
        if (!ispipe && d_unhashed(file->f_path.dentry))
1777
                goto close_fail;
1778
 
1779
        /* AK: actually i see no reason to not allow this for named pipes etc.,
1780
           but keep the previous behaviour for now. */
1781
        if (!ispipe && !S_ISREG(inode->i_mode))
1782
                goto close_fail;
1783
        /*
1784
         * Dont allow local users get cute and trick others to coredump
1785
         * into their pre-created files:
1786
         */
1787
        if (inode->i_uid != current->fsuid)
1788
                goto close_fail;
1789
        if (!file->f_op)
1790
                goto close_fail;
1791
        if (!file->f_op->write)
1792
                goto close_fail;
1793
        if (!ispipe && do_truncate(file->f_path.dentry, 0, 0, file) != 0)
1794
                goto close_fail;
1795
 
1796
        retval = binfmt->core_dump(signr, regs, file, core_limit);
1797
 
1798
        if (retval)
1799
                current->signal->group_exit_code |= 0x80;
1800
close_fail:
1801
        filp_close(file, NULL);
1802
fail_unlock:
1803
        if (helper_argv)
1804
                argv_free(helper_argv);
1805
 
1806
        current->fsuid = fsuid;
1807
        complete_all(&mm->core_done);
1808
fail:
1809
        return retval;
1810
}

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.