diff options
Diffstat (limited to 'arch/um/kernel')
45 files changed, 1255 insertions, 1163 deletions
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile index a8918e80df96..614b8ebeb0ed 100644 --- a/arch/um/kernel/Makefile +++ b/arch/um/kernel/Makefile @@ -8,25 +8,24 @@ clean-files := obj-y = config.o exec_kern.o exitcode.o \ helper.o init_task.o irq.o irq_user.o ksyms.o main.o mem.o mem_user.o \ - physmem.o process.o process_kern.o ptrace.o reboot.o resource.o \ - sigio_user.o sigio_kern.o signal_kern.o signal_user.o smp.o \ - syscall_kern.o sysrq.o tempfile.o time.o time_kern.o \ - tlb.o trap_kern.o trap_user.o uaccess_user.o um_arch.o umid.o \ - user_util.o + physmem.o process_kern.o ptrace.o reboot.o resource.o sigio_user.o \ + sigio_kern.o signal_kern.o signal_user.o smp.o syscall_kern.o sysrq.o \ + tempfile.o time.o time_kern.o tlb.o trap_kern.o trap_user.o \ + uaccess_user.o um_arch.o umid.o user_util.o obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o obj-$(CONFIG_GPROF) += gprof_syms.o obj-$(CONFIG_GCOV) += gmon_syms.o obj-$(CONFIG_TTY_LOG) += tty_log.o -obj-$(CONFIG_SYSCALL_DEBUG) += syscall_user.o +obj-$(CONFIG_SYSCALL_DEBUG) += syscall.o obj-$(CONFIG_MODE_TT) += tt/ obj-$(CONFIG_MODE_SKAS) += skas/ user-objs-$(CONFIG_TTY_LOG) += tty_log.o -USER_OBJS := $(user-objs-y) config.o helper.o main.o process.o tempfile.o \ - time.o tty_log.o umid.o user_util.o +USER_OBJS := $(user-objs-y) config.o helper.o main.o tempfile.o time.o \ + tty_log.o umid.o user_util.o include arch/um/scripts/Makefile.rules diff --git a/arch/um/kernel/asm-offsets.c b/arch/um/kernel/asm-offsets.c new file mode 100644 index 000000000000..c13a64a288f6 --- /dev/null +++ b/arch/um/kernel/asm-offsets.c @@ -0,0 +1 @@ +/* Dummy file to make kbuild happy - unused! */ diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S index 715b0838a68c..2517ecb8bf27 100644 --- a/arch/um/kernel/dyn.lds.S +++ b/arch/um/kernel/dyn.lds.S @@ -67,6 +67,12 @@ SECTIONS *(.stub .text.* .gnu.linkonce.t.*) /* .gnu.warning sections are handled specially by elf32.em. */ *(.gnu.warning) + + . = ALIGN(4096); + __syscall_stub_start = .; + *(.__syscall_stub*) + __syscall_stub_end = .; + . = ALIGN(4096); } =0x90909090 .fini : { KEEP (*(.fini)) @@ -140,37 +146,8 @@ SECTIONS } _end = .; PROVIDE (end = .); - /* Stabs debugging sections. */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } - /* DWARF debug sections. - Symbols in the DWARF debugging sections are relative to the beginning - of the section so we begin them at 0. */ - /* DWARF 1 */ - .debug 0 : { *(.debug) } - .line 0 : { *(.line) } - /* GNU DWARF 1 extensions */ - .debug_srcinfo 0 : { *(.debug_srcinfo) } - .debug_sfnames 0 : { *(.debug_sfnames) } - /* DWARF 1.1 and DWARF 2 */ - .debug_aranges 0 : { *(.debug_aranges) } - .debug_pubnames 0 : { *(.debug_pubnames) } - /* DWARF 2 */ - .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } - .debug_abbrev 0 : { *(.debug_abbrev) } - .debug_line 0 : { *(.debug_line) } - .debug_frame 0 : { *(.debug_frame) } - .debug_str 0 : { *(.debug_str) } - .debug_loc 0 : { *(.debug_loc) } - .debug_macinfo 0 : { *(.debug_macinfo) } - /* SGI/MIPS DWARF 2 extensions */ - .debug_weaknames 0 : { *(.debug_weaknames) } - .debug_funcnames 0 : { *(.debug_funcnames) } - .debug_typenames 0 : { *(.debug_typenames) } - .debug_varnames 0 : { *(.debug_varnames) } + + STABS_DEBUG + + DWARF_DEBUG } diff --git a/arch/um/kernel/exitcode.c b/arch/um/kernel/exitcode.c index 0ea87f24b36f..d21ebad666b4 100644 --- a/arch/um/kernel/exitcode.c +++ b/arch/um/kernel/exitcode.c @@ -48,7 +48,7 @@ static int make_proc_exitcode(void) ent = create_proc_entry("exitcode", 0600, &proc_root); if(ent == NULL){ - printk("make_proc_exitcode : Failed to register " + printk(KERN_WARNING "make_proc_exitcode : Failed to register " "/proc/exitcode\n"); return(0); } diff --git a/arch/um/kernel/helper.c b/arch/um/kernel/helper.c index 13b1f5c2f7ee..f83e1e8e2392 100644 --- a/arch/um/kernel/helper.c +++ b/arch/um/kernel/helper.c @@ -13,6 +13,7 @@ #include "user.h" #include "kern_util.h" #include "user_util.h" +#include "helper.h" #include "os.h" struct helper_data { @@ -149,7 +150,7 @@ int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags, return(pid); } -int helper_wait(int pid, int block) +int helper_wait(int pid) { int ret; @@ -160,14 +161,3 @@ int helper_wait(int pid, int block) } return(ret); } - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 9f18061ef4c9..dcd814971995 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -31,7 +31,7 @@ #include "kern_util.h" #include "irq_user.h" #include "irq_kern.h" - +#include "os.h" /* * Generic, controller-independent functions: @@ -168,13 +168,32 @@ void __init init_IRQ(void) } } -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ +int init_aio_irq(int irq, char *name, irqreturn_t (*handler)(int, void *, + struct pt_regs *)) +{ + int fds[2], err; + + err = os_pipe(fds, 1, 1); + if(err){ + printk("init_aio_irq - os_pipe failed, err = %d\n", -err); + goto out; + } + + err = um_request_irq(irq, fds[0], IRQ_READ, handler, + SA_INTERRUPT | SA_SAMPLE_RANDOM, name, + (void *) (long) fds[0]); + if(err){ + printk("init_aio_irq - : um_request_irq failed, err = %d\n", + err); + goto out_close; + } + + err = fds[1]; + goto out; + + out_close: + os_close_file(fds[0]); + os_close_file(fds[1]); + out: + return(err); +} diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c index 99439fa15ef4..a97a72e516aa 100644 --- a/arch/um/kernel/ksyms.c +++ b/arch/um/kernel/ksyms.c @@ -34,14 +34,9 @@ EXPORT_SYMBOL(host_task_size); EXPORT_SYMBOL(arch_validate); EXPORT_SYMBOL(get_kmem_end); -EXPORT_SYMBOL(page_to_phys); -EXPORT_SYMBOL(phys_to_page); EXPORT_SYMBOL(high_physmem); EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(um_virt_to_phys); -EXPORT_SYMBOL(__virt_to_page); -EXPORT_SYMBOL(to_phys); -EXPORT_SYMBOL(to_virt); EXPORT_SYMBOL(mode_tt); EXPORT_SYMBOL(handle_page_fault); EXPORT_SYMBOL(find_iomem); @@ -114,22 +109,3 @@ extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); EXPORT_SYMBOL(__read_lock_failed); #endif - -#ifdef CONFIG_HIGHMEM -EXPORT_SYMBOL(kmap); -EXPORT_SYMBOL(kunmap); -EXPORT_SYMBOL(kmap_atomic); -EXPORT_SYMBOL(kunmap_atomic); -EXPORT_SYMBOL(kmap_atomic_to_page); -#endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/main.c b/arch/um/kernel/main.c index 1e1a87f1c510..d31027f0fe39 100644 --- a/arch/um/kernel/main.c +++ b/arch/um/kernel/main.c @@ -97,7 +97,7 @@ int main(int argc, char **argv, char **envp) exit(1); } -#ifdef UML_CONFIG_MODE_TT +#ifdef UML_CONFIG_CMDLINE_ON_HOST /* Allocate memory for thread command lines */ if(argc < 2 || strlen(argv[1]) < THREAD_NAME_LEN - 1){ diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index 5597bd39e6b5..64fa062cc119 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -196,7 +196,7 @@ static void init_highmem(void) static void __init fixaddr_user_init( void) { -#if CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA +#ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA long size = FIXADDR_USER_END - FIXADDR_USER_START; pgd_t *pgd; pud_t *pud; diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c index 420e6d51fa0f..ea670fcc8af5 100644 --- a/arch/um/kernel/physmem.c +++ b/arch/um/kernel/physmem.c @@ -248,16 +248,6 @@ unsigned long high_physmem; extern unsigned long physmem_size; -void *to_virt(unsigned long phys) -{ - return((void *) uml_physmem + phys); -} - -unsigned long to_phys(void *virt) -{ - return(((unsigned long) virt) - uml_physmem); -} - int init_maps(unsigned long physmem, unsigned long iomem, unsigned long highmem) { struct page *p, *map; @@ -298,31 +288,6 @@ int init_maps(unsigned long physmem, unsigned long iomem, unsigned long highmem) return(0); } -struct page *phys_to_page(const unsigned long phys) -{ - return(&mem_map[phys >> PAGE_SHIFT]); -} - -struct page *__virt_to_page(const unsigned long virt) -{ - return(&mem_map[__pa(virt) >> PAGE_SHIFT]); -} - -phys_t page_to_phys(struct page *page) -{ - return((page - mem_map) << PAGE_SHIFT); -} - -pte_t mk_pte(struct page *page, pgprot_t pgprot) -{ - pte_t pte; - - pte_set_val(pte, page_to_phys(page), pgprot); - if(pte_present(pte)) - pte_mknewprot(pte_mknewpage(pte)); - return(pte); -} - /* Changed during early boot */ static unsigned long kmem_top = 0; @@ -353,6 +318,8 @@ void map_memory(unsigned long virt, unsigned long phys, unsigned long len, #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) +extern int __syscall_stub_start, __binary_start; + void setup_physmem(unsigned long start, unsigned long reserve_end, unsigned long len, unsigned long highmem) { @@ -371,6 +338,12 @@ void setup_physmem(unsigned long start, unsigned long reserve_end, exit(1); } + /* Special kludge - This page will be mapped in to userspace processes + * from physmem_fd, so it needs to be written out there. + */ + os_seek_file(physmem_fd, __pa(&__syscall_stub_start)); + os_write_file(physmem_fd, &__syscall_stub_start, PAGE_SIZE); + bootmap_size = init_bootmem(pfn, pfn + delta); free_bootmem(__pa(reserve_end) + bootmap_size, len - bootmap_size - reserve); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c deleted file mode 100644 index 1b5ef3e96c71..000000000000 --- a/arch/um/kernel/process.c +++ /dev/null @@ -1,410 +0,0 @@ -/* - * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -#include <stdio.h> -#include <unistd.h> -#include <signal.h> -#include <sched.h> -#include <errno.h> -#include <stdarg.h> -#include <stdlib.h> -#include <setjmp.h> -#include <sys/time.h> -#include <sys/wait.h> -#include <sys/mman.h> -#include <asm/unistd.h> -#include <asm/page.h> -#include "user_util.h" -#include "kern_util.h" -#include "user.h" -#include "process.h" -#include "signal_kern.h" -#include "signal_user.h" -#include "sysdep/ptrace.h" -#include "sysdep/sigcontext.h" -#include "irq_user.h" -#include "ptrace_user.h" -#include "time_user.h" -#include "init.h" -#include "os.h" -#include "uml-config.h" -#include "choose-mode.h" -#include "mode.h" -#ifdef UML_CONFIG_MODE_SKAS -#include "skas.h" -#include "skas_ptrace.h" -#include "registers.h" -#endif - -void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int)) -{ - int flags = 0, pages; - - if(sig_stack != NULL){ - pages = (1 << UML_CONFIG_KERNEL_STACK_ORDER); - set_sigstack(sig_stack, pages * page_size()); - flags = SA_ONSTACK; - } - if(usr1_handler) set_handler(SIGUSR1, usr1_handler, flags, -1); -} - -void init_new_thread_signals(int altstack) -{ - int flags = altstack ? SA_ONSTACK : 0; - - set_handler(SIGSEGV, (__sighandler_t) sig_handler, flags, - SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); - set_handler(SIGTRAP, (__sighandler_t) sig_handler, flags, - SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); - set_handler(SIGFPE, (__sighandler_t) sig_handler, flags, - SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); - set_handler(SIGILL, (__sighandler_t) sig_handler, flags, - SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); - set_handler(SIGBUS, (__sighandler_t) sig_handler, flags, - SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); - set_handler(SIGUSR2, (__sighandler_t) sig_handler, - flags, SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); - signal(SIGHUP, SIG_IGN); - - init_irq_signals(altstack); -} - -struct tramp { - int (*tramp)(void *); - void *tramp_data; - unsigned long temp_stack; - int flags; - int pid; -}; - -/* See above for why sigkill is here */ - -int sigkill = SIGKILL; - -int outer_tramp(void *arg) -{ - struct tramp *t; - int sig = sigkill; - - t = arg; - t->pid = clone(t->tramp, (void *) t->temp_stack + page_size()/2, - t->flags, t->tramp_data); - if(t->pid > 0) wait_for_stop(t->pid, SIGSTOP, PTRACE_CONT, NULL); - kill(os_getpid(), sig); - _exit(0); -} - -int start_fork_tramp(void *thread_arg, unsigned long temp_stack, - int clone_flags, int (*tramp)(void *)) -{ - struct tramp arg; - unsigned long sp; - int new_pid, status, err; - - /* The trampoline will run on the temporary stack */ - sp = stack_sp(temp_stack); - - clone_flags |= CLONE_FILES | SIGCHLD; - - arg.tramp = tramp; - arg.tramp_data = thread_arg; - arg.temp_stack = temp_stack; - arg.flags = clone_flags; - - /* Start the process and wait for it to kill itself */ - new_pid = clone(outer_tramp, (void *) sp, clone_flags, &arg); - if(new_pid < 0) - return(new_pid); - - CATCH_EINTR(err = waitpid(new_pid, &status, 0)); - if(err < 0) - panic("Waiting for outer trampoline failed - errno = %d", - errno); - - if(!WIFSIGNALED(status) || (WTERMSIG(status) != SIGKILL)) - panic("outer trampoline didn't exit with SIGKILL, " - "status = %d", status); - - return(arg.pid); -} - -static int ptrace_child(void) -{ - int ret; - int pid = os_getpid(), ppid = getppid(); - int sc_result; - - if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0){ - perror("ptrace"); - os_kill_process(pid, 0); - } - os_stop_process(pid); - - /*This syscall will be intercepted by the parent. Don't call more than - * once, please.*/ - sc_result = os_getpid(); - - if (sc_result == pid) - ret = 1; /*Nothing modified by the parent, we are running - normally.*/ - else if (sc_result == ppid) - ret = 0; /*Expected in check_ptrace and check_sysemu when they - succeed in modifying the stack frame*/ - else - ret = 2; /*Serious trouble! This could be caused by a bug in - host 2.6 SKAS3/2.6 patch before release -V6, together - with a bug in the UML code itself.*/ - _exit(ret); -} - -static int start_ptraced_child(void) -{ - int pid, n, status; - - pid = fork(); - if(pid == 0) - ptrace_child(); - - if(pid < 0) - panic("check_ptrace : fork failed, errno = %d", errno); - CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); - if(n < 0) - panic("check_ptrace : wait failed, errno = %d", errno); - if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) - panic("check_ptrace : expected SIGSTOP, got status = %d", - status); - - return(pid); -} - -/* When testing for SYSEMU support, if it is one of the broken versions, we must - * just avoid using sysemu, not panic, but only if SYSEMU features are broken. - * So only for SYSEMU features we test mustpanic, while normal host features - * must work anyway!*/ -static int stop_ptraced_child(int pid, int exitcode, int mustexit) -{ - int status, n, ret = 0; - - if(ptrace(PTRACE_CONT, pid, 0, 0) < 0) - panic("stop_ptraced_child : ptrace failed, errno = %d", errno); - CATCH_EINTR(n = waitpid(pid, &status, 0)); - if(!WIFEXITED(status) || (WEXITSTATUS(status) != exitcode)) { - int exit_with = WEXITSTATUS(status); - if (exit_with == 2) - printk("check_ptrace : child exited with status 2. " - "Serious trouble happening! Try updating your " - "host skas patch!\nDisabling SYSEMU support."); - printk("check_ptrace : child exited with exitcode %d, while " - "expecting %d; status 0x%x", exit_with, - exitcode, status); - if (mustexit) - panic("\n"); - else - printk("\n"); - ret = -1; - } - - return ret; -} - -static int force_sysemu_disabled = 0; - -static int __init nosysemu_cmd_param(char *str, int* add) -{ - force_sysemu_disabled = 1; - return 0; -} - -__uml_setup("nosysemu", nosysemu_cmd_param, - "nosysemu\n" - " Turns off syscall emulation patch for ptrace (SYSEMU) on.\n" - " SYSEMU is a performance-patch introduced by Laurent Vivier. It changes\n" - " behaviour of ptrace() and helps reducing host context switch rate.\n" - " To make it working, you need a kernel patch for your host, too.\n" - " See http://perso.wanadoo.fr/laurent.vivier/UML/ for further information.\n\n"); - -static void __init check_sysemu(void) -{ - int pid, syscall, n, status, count=0; - - printk("Checking syscall emulation patch for ptrace..."); - sysemu_supported = 0; - pid = start_ptraced_child(); - - if(ptrace(PTRACE_SYSEMU, pid, 0, 0) < 0) - goto fail; - - CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); - if (n < 0) - panic("check_sysemu : wait failed, errno = %d", errno); - if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP)) - panic("check_sysemu : expected SIGTRAP, " - "got status = %d", status); - - n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_RET_OFFSET, - os_getpid()); - if(n < 0) - panic("check_sysemu : failed to modify system " - "call return, errno = %d", errno); - - if (stop_ptraced_child(pid, 0, 0) < 0) - goto fail_stopped; - - sysemu_supported = 1; - printk("OK\n"); - set_using_sysemu(!force_sysemu_disabled); - - printk("Checking advanced syscall emulation patch for ptrace..."); - pid = start_ptraced_child(); - while(1){ - count++; - if(ptrace(PTRACE_SYSEMU_SINGLESTEP, pid, 0, 0) < 0) - goto fail; - CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); - if(n < 0) - panic("check_ptrace : wait failed, errno = %d", errno); - if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP)) - panic("check_ptrace : expected (SIGTRAP|SYSCALL_TRAP), " - "got status = %d", status); - - syscall = ptrace(PTRACE_PEEKUSR, pid, PT_SYSCALL_NR_OFFSET, - 0); - if(syscall == __NR_getpid){ - if (!count) - panic("check_ptrace : SYSEMU_SINGLESTEP doesn't singlestep"); - n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_RET_OFFSET, - os_getpid()); - if(n < 0) - panic("check_sysemu : failed to modify system " - "call return, errno = %d", errno); - break; - } - } - if (stop_ptraced_child(pid, 0, 0) < 0) - goto fail_stopped; - - sysemu_supported = 2; - printk("OK\n"); - - if ( !force_sysemu_disabled ) - set_using_sysemu(sysemu_supported); - return; - -fail: - stop_ptraced_child(pid, 1, 0); -fail_stopped: - printk("missing\n"); -} - -void __init check_ptrace(void) -{ - int pid, syscall, n, status; - - printk("Checking that ptrace can change system call numbers..."); - pid = start_ptraced_child(); - - if (ptrace(PTRACE_OLDSETOPTIONS, pid, 0, (void *)PTRACE_O_TRACESYSGOOD) < 0) - panic("check_ptrace: PTRACE_SETOPTIONS failed, errno = %d", errno); - - while(1){ - if(ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) - panic("check_ptrace : ptrace failed, errno = %d", - errno); - CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); - if(n < 0) - panic("check_ptrace : wait failed, errno = %d", errno); - if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP + 0x80)) - panic("check_ptrace : expected SIGTRAP + 0x80, " - "got status = %d", status); - - syscall = ptrace(PTRACE_PEEKUSR, pid, PT_SYSCALL_NR_OFFSET, - 0); - if(syscall == __NR_getpid){ - n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_NR_OFFSET, - __NR_getppid); - if(n < 0) - panic("check_ptrace : failed to modify system " - "call, errno = %d", errno); - break; - } - } - stop_ptraced_child(pid, 0, 1); - printk("OK\n"); - check_sysemu(); -} - -int run_kernel_thread(int (*fn)(void *), void *arg, void **jmp_ptr) -{ - sigjmp_buf buf; - int n; - - *jmp_ptr = &buf; - n = sigsetjmp(buf, 1); - if(n != 0) - return(n); - (*fn)(arg); - return(0); -} - -void forward_pending_sigio(int target) -{ - sigset_t sigs; - - if(sigpending(&sigs)) - panic("forward_pending_sigio : sigpending failed"); - if(sigismember(&sigs, SIGIO)) - kill(target, SIGIO); -} - -#ifdef UML_CONFIG_MODE_SKAS -static inline int check_skas3_ptrace_support(void) -{ - struct ptrace_faultinfo fi; - int pid, n, ret = 1; - - printf("Checking for the skas3 patch in the host..."); - pid = start_ptraced_child(); - - n = ptrace(PTRACE_FAULTINFO, pid, 0, &fi); - if (n < 0) { - if(errno == EIO) - printf("not found\n"); - else { - perror("not found"); - } - ret = 0; - } else { - printf("found\n"); - } - - init_registers(pid); - stop_ptraced_child(pid, 1, 1); - - return(ret); -} - -int can_do_skas(void) -{ - int ret = 1; - - printf("Checking for /proc/mm..."); - if (os_access("/proc/mm", OS_ACC_W_OK) < 0) { - printf("not found\n"); - ret = 0; - goto out; - } else { - printf("found\n"); - } - - ret = check_skas3_ptrace_support(); -out: - return ret; -} -#else -int can_do_skas(void) -{ - return(0); -} -#endif diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c index d4036ed680bc..c23d8a08d0ff 100644 --- a/arch/um/kernel/process_kern.c +++ b/arch/um/kernel/process_kern.c @@ -412,7 +412,7 @@ int __init make_proc_sysemu(void) if (ent == NULL) { - printk("Failed to register /proc/sysemu\n"); + printk(KERN_WARNING "Failed to register /proc/sysemu\n"); return(0); } diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c index fcec51da1d37..a637e885c583 100644 --- a/arch/um/kernel/reboot.c +++ b/arch/um/kernel/reboot.c @@ -49,23 +49,17 @@ void machine_restart(char * __unused) CHOOSE_MODE(reboot_tt(), reboot_skas()); } -EXPORT_SYMBOL(machine_restart); - void machine_power_off(void) { uml_cleanup(); CHOOSE_MODE(halt_tt(), halt_skas()); } -EXPORT_SYMBOL(machine_power_off); - void machine_halt(void) { machine_power_off(); } -EXPORT_SYMBOL(machine_halt); - /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically diff --git a/arch/um/kernel/signal_kern.c b/arch/um/kernel/signal_kern.c index 7807a3e8c426..03618bd13d55 100644 --- a/arch/um/kernel/signal_kern.c +++ b/arch/um/kernel/signal_kern.c @@ -87,12 +87,12 @@ static int handle_signal(struct pt_regs *regs, unsigned long signr, recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); force_sigsegv(signr, current); - } - else if(!(ka->sa.sa_flags & SA_NODEFER)){ + } else { spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); - sigaddset(¤t->blocked, signr); + if(!(ka->sa.sa_flags & SA_NODEFER)) + sigaddset(¤t->blocked, signr); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); } diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile index ff69c4b312c0..db36c7c95940 100644 --- a/arch/um/kernel/skas/Makefile +++ b/arch/um/kernel/skas/Makefile @@ -3,11 +3,14 @@ # Licensed under the GPL # -obj-y := exec_kern.o mem.o mem_user.o mmu.o process.o process_kern.o \ - syscall_kern.o syscall_user.o tlb.o trap_user.o uaccess.o \ +obj-y := clone.o exec_kern.o mem.o mem_user.o mmu.o process.o process_kern.o \ + syscall.o tlb.o trap_user.o uaccess.o subdir- := util -USER_OBJS := process.o +USER_OBJS := process.o clone.o include arch/um/scripts/Makefile.rules + +# clone.o is in the stub, so it can't be built with profiling +$(obj)/clone.o : c_flags = -Wp,-MD,$(depfile) $(call unprofile,$(USER_CFLAGS)) diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c new file mode 100644 index 000000000000..4dc55f10cd18 --- /dev/null +++ b/arch/um/kernel/skas/clone.c @@ -0,0 +1,44 @@ +#include <sched.h> +#include <signal.h> +#include <sys/mman.h> +#include <sys/time.h> +#include <asm/unistd.h> +#include <asm/page.h> +#include "ptrace_user.h" +#include "skas.h" +#include "stub-data.h" +#include "uml-config.h" +#include "sysdep/stub.h" + +/* This is in a separate file because it needs to be compiled with any + * extraneous gcc flags (-pg, -fprofile-arcs, -ftest-coverage) disabled + */ +void __attribute__ ((__section__ (".__syscall_stub"))) +stub_clone_handler(void) +{ + long err; + struct stub_data *from = (struct stub_data *) UML_CONFIG_STUB_DATA; + + err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD, + UML_CONFIG_STUB_DATA + PAGE_SIZE / 2 - + sizeof(void *)); + if(err != 0) + goto out; + + err = stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0); + if(err) + goto out; + + err = stub_syscall3(__NR_setitimer, ITIMER_VIRTUAL, + (long) &from->timer, 0); + if(err) + goto out; + + err = stub_syscall6(STUB_MMAP_NR, UML_CONFIG_STUB_DATA, PAGE_SIZE, + PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, + from->fd, from->offset); + out: + /* save current result. Parent: pid; child: retcode of mmap */ + from->err = err; + trap_myself(); +} diff --git a/arch/um/kernel/skas/exec_kern.c b/arch/um/kernel/skas/exec_kern.c index c6b4d5dba789..77ed7bbab219 100644 --- a/arch/um/kernel/skas/exec_kern.c +++ b/arch/um/kernel/skas/exec_kern.c @@ -18,7 +18,7 @@ void flush_thread_skas(void) { force_flush_all(); - switch_mm_skas(current->mm->context.skas.mm_fd); + switch_mm_skas(¤t->mm->context.skas.id); } void start_thread_skas(struct pt_regs *regs, unsigned long eip, diff --git a/arch/um/kernel/skas/include/mm_id.h b/arch/um/kernel/skas/include/mm_id.h new file mode 100644 index 000000000000..48dd0989ddaa --- /dev/null +++ b/arch/um/kernel/skas/include/mm_id.h @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2005 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __MM_ID_H +#define __MM_ID_H + +struct mm_id { + union { + int mm_fd; + int pid; + } u; + unsigned long stack; +}; + +#endif diff --git a/arch/um/kernel/skas/include/mmu-skas.h b/arch/um/kernel/skas/include/mmu-skas.h index 4cd60d7213f3..09536f81ee42 100644 --- a/arch/um/kernel/skas/include/mmu-skas.h +++ b/arch/um/kernel/skas/include/mmu-skas.h @@ -6,10 +6,19 @@ #ifndef __SKAS_MMU_H #define __SKAS_MMU_H +#include "linux/config.h" +#include "mm_id.h" + struct mmu_context_skas { - int mm_fd; + struct mm_id id; + unsigned long last_page_table; +#ifdef CONFIG_3_LEVEL_PGTABLES + unsigned long last_pmd; +#endif }; +extern void switch_mm_skas(struct mm_id * mm_idp); + #endif /* diff --git a/arch/um/kernel/skas/include/skas.h b/arch/um/kernel/skas/include/skas.h index 96b51dba3471..060934740f9f 100644 --- a/arch/um/kernel/skas/include/skas.h +++ b/arch/um/kernel/skas/include/skas.h @@ -6,9 +6,11 @@ #ifndef __SKAS_H #define __SKAS_H +#include "mm_id.h" #include "sysdep/ptrace.h" extern int userspace_pid[]; +extern int proc_mm, ptrace_faultinfo; extern void switch_threads(void *me, void *next); extern void thread_wait(void *sw, void *fb); @@ -22,26 +24,26 @@ extern void new_thread_proc(void *stack, void (*handler)(int sig)); extern void remove_sigstack(void); extern void new_thread_handler(int sig); extern void handle_syscall(union uml_pt_regs *regs); -extern void map(int fd, unsigned long virt, unsigned long len, int r, int w, - int x, int phys_fd, unsigned long long offset); -extern int unmap(int fd, void *addr, unsigned long len); -extern int protect(int fd, unsigned long addr, unsigned long len, - int r, int w, int x); +extern int map(struct mm_id * mm_idp, unsigned long virt, + unsigned long len, int r, int w, int x, int phys_fd, + unsigned long long offset, int done, void **data); +extern int unmap(struct mm_id * mm_idp, void *addr, unsigned long len, + int done, void **data); +extern int protect(struct mm_id * mm_idp, unsigned long addr, + unsigned long len, int r, int w, int x, int done, + void **data); extern void user_signal(int sig, union uml_pt_regs *regs, int pid); -extern int new_mm(int from); -extern void start_userspace(int cpu); +extern int new_mm(int from, unsigned long stack); +extern int start_userspace(unsigned long stub_stack); +extern int copy_context_skas0(unsigned long stack, int pid); extern void get_skas_faultinfo(int pid, struct faultinfo * fi); extern long execute_syscall_skas(void *r); +extern unsigned long current_stub_stack(void); +extern long run_syscall_stub(struct mm_id * mm_idp, + int syscall, unsigned long *args, long expected, + void **addr, int done); +extern long syscall_stub_data(struct mm_id * mm_idp, + unsigned long *data, int data_count, + void **addr, void **stub_addr); #endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/skas/include/stub-data.h b/arch/um/kernel/skas/include/stub-data.h new file mode 100644 index 000000000000..f6ed92c3727d --- /dev/null +++ b/arch/um/kernel/skas/include/stub-data.h @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2005 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __STUB_DATA_H +#define __STUB_DATA_H + +#include <sys/time.h> + +struct stub_data { + long offset; + int fd; + struct itimerval timer; + long err; +}; + +#endif diff --git a/arch/um/kernel/skas/include/uaccess-skas.h b/arch/um/kernel/skas/include/uaccess-skas.h index cd6c280482cb..6ee3f3902e68 100644 --- a/arch/um/kernel/skas/include/uaccess-skas.h +++ b/arch/um/kernel/skas/include/uaccess-skas.h @@ -18,18 +18,18 @@ ((unsigned long) (addr) + (size) <= FIXADDR_USER_END) && \ ((unsigned long) (addr) + (size) >= (unsigned long)(addr)))) -static inline int verify_area_skas(int type, const void * addr, +static inline int verify_area_skas(int type, const void __user * addr, unsigned long size) { return(access_ok_skas(type, addr, size) ? 0 : -EFAULT); } -extern int copy_from_user_skas(void *to, const void *from, int n); -extern int copy_to_user_skas(void *to, const void *from, int n); -extern int strncpy_from_user_skas(char *dst, const char *src, int count); -extern int __clear_user_skas(void *mem, int len); -extern int clear_user_skas(void *mem, int len); -extern int strnlen_user_skas(const void *str, int len); +extern int copy_from_user_skas(void *to, const void __user *from, int n); +extern int copy_to_user_skas(void __user *to, const void *from, int n); +extern int strncpy_from_user_skas(char *dst, const char __user *src, int count); +extern int __clear_user_skas(void __user *mem, int len); +extern int clear_user_skas(void __user *mem, int len); +extern int strnlen_user_skas(const void __user *str, int len); #endif diff --git a/arch/um/kernel/skas/mem.c b/arch/um/kernel/skas/mem.c index 438db2f43456..147466d7ff4f 100644 --- a/arch/um/kernel/skas/mem.c +++ b/arch/um/kernel/skas/mem.c @@ -5,7 +5,9 @@ #include "linux/config.h" #include "linux/mm.h" +#include "asm/pgtable.h" #include "mem_user.h" +#include "skas.h" unsigned long set_task_sizes_skas(int arg, unsigned long *host_size_out, unsigned long *task_size_out) @@ -18,7 +20,9 @@ unsigned long set_task_sizes_skas(int arg, unsigned long *host_size_out, *task_size_out = CONFIG_HOST_TASK_SIZE; #else *host_size_out = top; - *task_size_out = top; + if (proc_mm && ptrace_faultinfo) + *task_size_out = top; + else *task_size_out = CONFIG_STUB_START & PGDIR_MASK; #endif return(((unsigned long) set_task_sizes_skas) & ~0xffffff); } diff --git a/arch/um/kernel/skas/mem_user.c b/arch/um/kernel/skas/mem_user.c index 1310bf1e88d1..1d89640bd502 100644 --- a/arch/um/kernel/skas/mem_user.c +++ b/arch/um/kernel/skas/mem_user.c @@ -3,100 +3,279 @@ * Licensed under the GPL */ +#include <signal.h> #include <errno.h> +#include <string.h> #include <sys/mman.h> +#include <sys/wait.h> +#include <asm/page.h> +#include <asm/unistd.h> #include "mem_user.h" #include "mem.h" +#include "skas.h" #include "user.h" #include "os.h" #include "proc_mm.h" +#include "ptrace_user.h" +#include "user_util.h" +#include "kern_util.h" +#include "task.h" +#include "registers.h" +#include "uml-config.h" +#include "sysdep/ptrace.h" +#include "sysdep/stub.h" -void map(int fd, unsigned long virt, unsigned long len, int r, int w, - int x, int phys_fd, unsigned long long offset) +extern unsigned long batch_syscall_stub, __syscall_stub_start; + +extern void wait_stub_done(int pid, int sig, char * fname); + +static inline unsigned long *check_init_stack(struct mm_id * mm_idp, + unsigned long *stack) { - struct proc_mm_op map; - int prot, n; - - prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | - (x ? PROT_EXEC : 0); - - map = ((struct proc_mm_op) { .op = MM_MMAP, - .u = - { .mmap = - { .addr = virt, - .len = len, - .prot = prot, - .flags = MAP_SHARED | - MAP_FIXED, - .fd = phys_fd, - .offset = offset - } } } ); - n = os_write_file(fd, &map, sizeof(map)); - if(n != sizeof(map)) - printk("map : /proc/mm map failed, err = %d\n", -n); + if(stack == NULL){ + stack = (unsigned long *) mm_idp->stack + 2; + *stack = 0; + } + return stack; } -int unmap(int fd, void *addr, unsigned long len) +extern int proc_mm; + +int single_count = 0; +int multi_count = 0; +int multi_op_count = 0; + +static long do_syscall_stub(struct mm_id *mm_idp, void **addr) { - struct proc_mm_op unmap; - int n; - - unmap = ((struct proc_mm_op) { .op = MM_MUNMAP, - .u = - { .munmap = - { .addr = (unsigned long) addr, - .len = len } } } ); - n = os_write_file(fd, &unmap, sizeof(unmap)); - if(n != sizeof(unmap)) { - if(n < 0) - return(n); - else if(n > 0) - return(-EIO); + unsigned long regs[MAX_REG_NR]; + unsigned long *data; + unsigned long *syscall; + long ret, offset; + int n, pid = mm_idp->u.pid; + + if(proc_mm) +#warning Need to look up userspace_pid by cpu + pid = userspace_pid[0]; + + multi_count++; + + get_safe_registers(regs); + regs[REGS_IP_INDEX] = UML_CONFIG_STUB_CODE + + ((unsigned long) &batch_syscall_stub - + (unsigned long) &__syscall_stub_start); + n = ptrace_setregs(pid, regs); + if(n < 0) + panic("do_syscall_stub : PTRACE_SETREGS failed, errno = %d\n", + n); + + wait_stub_done(pid, 0, "do_syscall_stub"); + + /* When the stub stops, we find the following values on the + * beginning of the stack: + * (long )return_value + * (long )offset to failed sycall-data (0, if no error) + */ + ret = *((unsigned long *) mm_idp->stack); + offset = *((unsigned long *) mm_idp->stack + 1); + if (offset) { + data = (unsigned long *)(mm_idp->stack + + offset - UML_CONFIG_STUB_DATA); + syscall = (unsigned long *)((unsigned long)data + data[0]); + printk("do_syscall_stub: syscall %ld failed, return value = " + "0x%lx, expected return value = 0x%lx\n", + syscall[0], ret, syscall[7]); + printk(" syscall parameters: " + "0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", + syscall[1], syscall[2], syscall[3], + syscall[4], syscall[5], syscall[6]); + for(n = 1; n < data[0]/sizeof(long); n++) { + if(n == 1) + printk(" additional syscall data:"); + if(n % 4 == 1) + printk("\n "); + printk(" 0x%lx", data[n]); + } + if(n > 1) + printk("\n"); } + else ret = 0; + + *addr = check_init_stack(mm_idp, NULL); - return(0); + return ret; } -int protect(int fd, unsigned long addr, unsigned long len, int r, int w, - int x, int must_succeed) +long run_syscall_stub(struct mm_id * mm_idp, int syscall, + unsigned long *args, long expected, void **addr, + int done) { - struct proc_mm_op protect; - int prot, n; + unsigned long *stack = check_init_stack(mm_idp, *addr); - prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | - (x ? PROT_EXEC : 0); + if(done && *addr == NULL) + single_count++; - protect = ((struct proc_mm_op) { .op = MM_MPROTECT, - .u = - { .mprotect = - { .addr = (unsigned long) addr, - .len = len, - .prot = prot } } } ); + *stack += sizeof(long); + stack += *stack / sizeof(long); - n = os_write_file(fd, &protect, sizeof(protect)); - if(n != sizeof(protect)) { - if(n == 0) return(0); + *stack++ = syscall; + *stack++ = args[0]; + *stack++ = args[1]; + *stack++ = args[2]; + *stack++ = args[3]; + *stack++ = args[4]; + *stack++ = args[5]; + *stack++ = expected; + *stack = 0; + multi_op_count++; - if(must_succeed) - panic("protect failed, err = %d", -n); + if(!done && ((((unsigned long) stack) & ~PAGE_MASK) < + PAGE_SIZE - 10 * sizeof(long))){ + *addr = stack; + return 0; + } - return(-EIO); + return do_syscall_stub(mm_idp, addr); +} + +long syscall_stub_data(struct mm_id * mm_idp, + unsigned long *data, int data_count, + void **addr, void **stub_addr) +{ + unsigned long *stack; + int ret = 0; + + /* If *addr still is uninitialized, it *must* contain NULL. + * Thus in this case do_syscall_stub correctly won't be called. + */ + if((((unsigned long) *addr) & ~PAGE_MASK) >= + PAGE_SIZE - (10 + data_count) * sizeof(long)) { + ret = do_syscall_stub(mm_idp, addr); + /* in case of error, don't overwrite data on stack */ + if(ret) + return ret; } - return(0); + stack = check_init_stack(mm_idp, *addr); + *addr = stack; + + *stack = data_count * sizeof(long); + + memcpy(stack + 1, data, data_count * sizeof(long)); + + *stub_addr = (void *)(((unsigned long)(stack + 1) & ~PAGE_MASK) + + UML_CONFIG_STUB_DATA); + + return 0; } -void before_mem_skas(unsigned long unused) +int map(struct mm_id * mm_idp, unsigned long virt, unsigned long len, + int r, int w, int x, int phys_fd, unsigned long long offset, + int done, void **data) +{ + int prot, ret; + + prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | + (x ? PROT_EXEC : 0); + + if(proc_mm){ + struct proc_mm_op map; + int fd = mm_idp->u.mm_fd; + + map = ((struct proc_mm_op) { .op = MM_MMAP, + .u = + { .mmap = + { .addr = virt, + .len = len, + .prot = prot, + .flags = MAP_SHARED | + MAP_FIXED, + .fd = phys_fd, + .offset= offset + } } } ); + ret = os_write_file(fd, &map, sizeof(map)); + if(ret != sizeof(map)) + printk("map : /proc/mm map failed, err = %d\n", -ret); + else ret = 0; + } + else { + unsigned long args[] = { virt, len, prot, + MAP_SHARED | MAP_FIXED, phys_fd, + MMAP_OFFSET(offset) }; + + ret = run_syscall_stub(mm_idp, STUB_MMAP_NR, args, virt, + data, done); + } + + return ret; +} + +int unmap(struct mm_id * mm_idp, void *addr, unsigned long len, int done, + void **data) { + int ret; + + if(proc_mm){ + struct proc_mm_op unmap; + int fd = mm_idp->u.mm_fd; + + unmap = ((struct proc_mm_op) { .op = MM_MUNMAP, + .u = + { .munmap = + { .addr = + (unsigned long) addr, + .len = len } } } ); + ret = os_write_file(fd, &unmap, sizeof(unmap)); + if(ret != sizeof(unmap)) + printk("unmap - proc_mm write returned %d\n", ret); + else ret = 0; + } + else { + unsigned long args[] = { (unsigned long) addr, len, 0, 0, 0, + 0 }; + + ret = run_syscall_stub(mm_idp, __NR_munmap, args, 0, + data, done); + if(ret < 0) + printk("munmap stub failed, errno = %d\n", ret); + } + + return ret; } -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ +int protect(struct mm_id * mm_idp, unsigned long addr, unsigned long len, + int r, int w, int x, int done, void **data) +{ + struct proc_mm_op protect; + int prot, ret; + + prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | + (x ? PROT_EXEC : 0); + + if(proc_mm){ + int fd = mm_idp->u.mm_fd; + protect = ((struct proc_mm_op) { .op = MM_MPROTECT, + .u = + { .mprotect = + { .addr = + (unsigned long) addr, + .len = len, + .prot = prot } } } ); + + ret = os_write_file(fd, &protect, sizeof(protect)); + if(ret != sizeof(protect)) + printk("protect failed, err = %d", -ret); + else ret = 0; + } + else { + unsigned long args[] = { addr, len, prot, 0, 0, 0 }; + + ret = run_syscall_stub(mm_idp, __NR_mprotect, args, 0, + data, done); + } + + return ret; +} + +void before_mem_skas(unsigned long unused) +{ +} diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index 6cb9a6d028a9..240143b616a2 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -3,46 +3,154 @@ * Licensed under the GPL */ +#include "linux/config.h" #include "linux/sched.h" #include "linux/list.h" #include "linux/spinlock.h" #include "linux/slab.h" +#include "linux/errno.h" +#include "linux/mm.h" #include "asm/current.h" #include "asm/segment.h" #include "asm/mmu.h" +#include "asm/pgalloc.h" +#include "asm/pgtable.h" #include "os.h" #include "skas.h" +extern int __syscall_stub_start; + +static int init_stub_pte(struct mm_struct *mm, unsigned long proc, + unsigned long kernel) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + spin_lock(&mm->page_table_lock); + pgd = pgd_offset(mm, proc); + pud = pud_alloc(mm, pgd, proc); + if (!pud) + goto out; + + pmd = pmd_alloc(mm, pud, proc); + if (!pmd) + goto out_pmd; + + pte = pte_alloc_map(mm, pmd, proc); + if (!pte) + goto out_pte; + + /* There's an interaction between the skas0 stub pages, stack + * randomization, and the BUG at the end of exit_mmap. exit_mmap + * checks that the number of page tables freed is the same as had + * been allocated. If the stack is on the last page table page, + * then the stack pte page will be freed, and if not, it won't. To + * avoid having to know where the stack is, or if the process mapped + * something at the top of its address space for some other reason, + * we set TASK_SIZE to end at the start of the last page table. + * This keeps exit_mmap off the last page, but introduces a leak + * of that page. So, we hang onto it here and free it in + * destroy_context_skas. + */ + + mm->context.skas.last_page_table = pmd_page_kernel(*pmd); +#ifdef CONFIG_3_LEVEL_PGTABLES + mm->context.skas.last_pmd = (unsigned long) __va(pud_val(*pud)); +#endif + + *pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT)); + *pte = pte_mkexec(*pte); + *pte = pte_wrprotect(*pte); + spin_unlock(&mm->page_table_lock); + return(0); + + out_pmd: + pud_free(pud); + out_pte: + pmd_free(pmd); + out: + spin_unlock(&mm->page_table_lock); + return(-ENOMEM); +} + int init_new_context_skas(struct task_struct *task, struct mm_struct *mm) { - int from; + struct mm_struct *cur_mm = current->mm; + struct mm_id *cur_mm_id = &cur_mm->context.skas.id; + struct mm_id *mm_id = &mm->context.skas.id; + unsigned long stack = 0; + int from, ret = -ENOMEM; + + if(!proc_mm || !ptrace_faultinfo){ + stack = get_zeroed_page(GFP_KERNEL); + if(stack == 0) + goto out; - if((current->mm != NULL) && (current->mm != &init_mm)) - from = current->mm->context.skas.mm_fd; - else from = -1; + /* This zeros the entry that pgd_alloc didn't, needed since + * we are about to reinitialize it, and want mm.nr_ptes to + * be accurate. + */ + mm->pgd[USER_PTRS_PER_PGD] = __pgd(0); - mm->context.skas.mm_fd = new_mm(from); - if(mm->context.skas.mm_fd < 0){ - printk("init_new_context_skas - new_mm failed, errno = %d\n", - mm->context.skas.mm_fd); - return(mm->context.skas.mm_fd); + ret = init_stub_pte(mm, CONFIG_STUB_CODE, + (unsigned long) &__syscall_stub_start); + if(ret) + goto out_free; + + ret = init_stub_pte(mm, CONFIG_STUB_DATA, stack); + if(ret) + goto out_free; + + mm->nr_ptes--; } + mm_id->stack = stack; - return(0); + if(proc_mm){ + if((cur_mm != NULL) && (cur_mm != &init_mm)) + from = cur_mm_id->u.mm_fd; + else from = -1; + + ret = new_mm(from, stack); + if(ret < 0){ + printk("init_new_context_skas - new_mm failed, " + "errno = %d\n", ret); + goto out_free; + } + mm_id->u.mm_fd = ret; + } + else { + if((cur_mm != NULL) && (cur_mm != &init_mm)) + mm_id->u.pid = copy_context_skas0(stack, + cur_mm_id->u.pid); + else mm_id->u.pid = start_userspace(stack); + } + + return 0; + + out_free: + if(mm_id->stack != 0) + free_page(mm_id->stack); + out: + return ret; } void destroy_context_skas(struct mm_struct *mm) { - os_close_file(mm->context.skas.mm_fd); -} + struct mmu_context_skas *mmu = &mm->context.skas; -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ + if(proc_mm) + os_close_file(mmu->id.u.mm_fd); + else + os_kill_ptraced_process(mmu->id.u.pid, 1); + + if(!proc_mm || !ptrace_faultinfo){ + free_page(mmu->id.stack); + pte_free_kernel((pte_t *) mmu->last_page_table); + dec_page_state(nr_page_table_pages); +#ifdef CONFIG_3_LEVEL_PGTABLES + pmd_free((pmd_t *) mmu->last_pmd); +#endif + } +} diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index 773cd2b525fc..5cd0e9929789 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2002- 2004 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ @@ -13,7 +13,9 @@ #include <sys/wait.h> #include <sys/mman.h> #include <sys/user.h> +#include <sys/time.h> #include <asm/unistd.h> +#include <asm/types.h> #include "user.h" #include "ptrace_user.h" #include "time_user.h" @@ -21,13 +23,18 @@ #include "user_util.h" #include "kern_util.h" #include "skas.h" +#include "stub-data.h" +#include "mm_id.h" #include "sysdep/sigcontext.h" +#include "sysdep/stub.h" #include "os.h" #include "proc_mm.h" #include "skas_ptrace.h" #include "chan_user.h" #include "signal_user.h" #include "registers.h" +#include "mem.h" +#include "uml-config.h" #include "process.h" int is_skas_winch(int pid, int fd, void *data) @@ -39,20 +46,59 @@ int is_skas_winch(int pid, int fd, void *data) return(1); } -void get_skas_faultinfo(int pid, struct faultinfo * fi) +void wait_stub_done(int pid, int sig, char * fname) { - int err; - - err = ptrace(PTRACE_FAULTINFO, pid, 0, fi); - if(err) - panic("get_skas_faultinfo - PTRACE_FAULTINFO failed, " - "errno = %d\n", errno); + int n, status, err; + + do { + if ( sig != -1 ) { + err = ptrace(PTRACE_CONT, pid, 0, sig); + if(err) + panic("%s : continue failed, errno = %d\n", + fname, errno); + } + sig = 0; + + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); + } while((n >= 0) && WIFSTOPPED(status) && + ((WSTOPSIG(status) == SIGVTALRM) || + /* running UML inside a detached screen can cause + * SIGWINCHes + */ + (WSTOPSIG(status) == SIGWINCH))); + + if((n < 0) || !WIFSTOPPED(status) || + (WSTOPSIG(status) != SIGUSR1 && WSTOPSIG(status) != SIGTRAP)){ + panic("%s : failed to wait for SIGUSR1/SIGTRAP, " + "pid = %d, n = %d, errno = %d, status = 0x%x\n", + fname, pid, n, errno, status); + } +} - /* Special handling for i386, which has different structs */ - if (sizeof(struct ptrace_faultinfo) < sizeof(struct faultinfo)) - memset((char *)fi + sizeof(struct ptrace_faultinfo), 0, - sizeof(struct faultinfo) - - sizeof(struct ptrace_faultinfo)); +void get_skas_faultinfo(int pid, struct faultinfo * fi) +{ + int err; + + if(ptrace_faultinfo){ + err = ptrace(PTRACE_FAULTINFO, pid, 0, fi); + if(err) + panic("get_skas_faultinfo - PTRACE_FAULTINFO failed, " + "errno = %d\n", errno); + + /* Special handling for i386, which has different structs */ + if (sizeof(struct ptrace_faultinfo) < sizeof(struct faultinfo)) + memset((char *)fi + sizeof(struct ptrace_faultinfo), 0, + sizeof(struct faultinfo) - + sizeof(struct ptrace_faultinfo)); + } + else { + wait_stub_done(pid, SIGSEGV, "get_skas_faultinfo"); + + /* faultinfo is prepared by the stub-segv-handler at start of + * the stub stack page. We just have to copy it. + */ + memcpy(fi, (void *)current_stub_stack(), sizeof(*fi)); + } } static void handle_segv(int pid, union uml_pt_regs * regs) @@ -91,11 +137,58 @@ static void handle_trap(int pid, union uml_pt_regs *regs, int local_using_sysemu handle_syscall(regs); } -static int userspace_tramp(void *arg) +extern int __syscall_stub_start; +int stub_code_fd = -1; +__u64 stub_code_offset; + +static int userspace_tramp(void *stack) { - init_new_thread_signals(0); - enable_timer(); + void *addr; + ptrace(PTRACE_TRACEME, 0, 0, 0); + + init_new_thread_signals(1); + enable_timer(); + + if(!proc_mm){ + /* This has a pte, but it can't be mapped in with the usual + * tlb_flush mechanism because this is part of that mechanism + */ + addr = mmap64((void *) UML_CONFIG_STUB_CODE, page_size(), + PROT_EXEC, MAP_FIXED | MAP_PRIVATE, + stub_code_fd, stub_code_offset); + if(addr == MAP_FAILED){ + printk("mapping stub code failed, errno = %d\n", + errno); + exit(1); + } + + if(stack != NULL){ + int fd; + __u64 offset; + + fd = phys_mapping(to_phys(stack), &offset); + addr = mmap((void *) UML_CONFIG_STUB_DATA, page_size(), + PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_SHARED, fd, offset); + if(addr == MAP_FAILED){ + printk("mapping stub stack failed, " + "errno = %d\n", errno); + exit(1); + } + } + } + if(!ptrace_faultinfo){ + unsigned long v = UML_CONFIG_STUB_CODE + + (unsigned long) stub_segv_handler - + (unsigned long) &__syscall_stub_start; + + set_sigstack((void *) UML_CONFIG_STUB_DATA, page_size()); + set_handler(SIGSEGV, (void *) v, SA_ONSTACK, + SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, + SIGUSR1, -1); + } + os_stop_process(os_getpid()); return(0); } @@ -105,11 +198,15 @@ static int userspace_tramp(void *arg) #define NR_CPUS 1 int userspace_pid[NR_CPUS]; -void start_userspace(int cpu) +int start_userspace(unsigned long stub_stack) { void *stack; unsigned long sp; - int pid, status, n; + int pid, status, n, flags; + + if ( stub_code_fd == -1 ) + stub_code_fd = phys_mapping(to_phys(&__syscall_stub_start), + &stub_code_offset); stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); @@ -117,8 +214,9 @@ void start_userspace(int cpu) panic("start_userspace : mmap failed, errno = %d", errno); sp = (unsigned long) stack + PAGE_SIZE - sizeof(void *); - pid = clone(userspace_tramp, (void *) sp, - CLONE_FILES | CLONE_VM | SIGCHLD, NULL); + flags = CLONE_FILES | SIGCHLD; + if(proc_mm) flags |= CLONE_VM; + pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack); if(pid < 0) panic("start_userspace : clone failed, errno = %d", errno); @@ -140,7 +238,7 @@ void start_userspace(int cpu) if(munmap(stack, PAGE_SIZE) < 0) panic("start_userspace : munmap failed, errno = %d\n", errno); - userspace_pid[cpu] = pid; + return(pid); } void userspace(union uml_pt_regs *regs) @@ -174,7 +272,9 @@ void userspace(union uml_pt_regs *regs) if(WIFSTOPPED(status)){ switch(WSTOPSIG(status)){ case SIGSEGV: - handle_segv(pid, regs); + if(PTRACE_FULL_FAULTINFO || !ptrace_faultinfo) + user_signal(SIGSEGV, regs, pid); + else handle_segv(pid, regs); break; case SIGTRAP + 0x80: handle_trap(pid, regs, local_using_sysemu); @@ -194,6 +294,7 @@ void userspace(union uml_pt_regs *regs) printk("userspace - child stopped with signal " "%d\n", WSTOPSIG(status)); } + pid = userspace_pid[0]; interrupt_end(); /* Avoid -ERESTARTSYS handling in host */ @@ -207,6 +308,114 @@ void userspace(union uml_pt_regs *regs) #define INIT_JMP_HALT 3 #define INIT_JMP_REBOOT 4 + +int copy_context_skas0(unsigned long new_stack, int pid) +{ + int err; + unsigned long regs[MAX_REG_NR]; + unsigned long current_stack = current_stub_stack(); + struct stub_data *data = (struct stub_data *) current_stack; + struct stub_data *child_data = (struct stub_data *) new_stack; + __u64 new_offset; + int new_fd = phys_mapping(to_phys((void *)new_stack), &new_offset); + + /* prepare offset and fd of child's stack as argument for parent's + * and child's mmap2 calls + */ + *data = ((struct stub_data) { .offset = MMAP_OFFSET(new_offset), + .fd = new_fd, + .timer = ((struct itimerval) + { { 0, 1000000 / hz() }, + { 0, 1000000 / hz() }})}); + get_safe_registers(regs); + + /* Set parent's instruction pointer to start of clone-stub */ + regs[REGS_IP_INDEX] = UML_CONFIG_STUB_CODE + + (unsigned long) stub_clone_handler - + (unsigned long) &__syscall_stub_start; + regs[REGS_SP_INDEX] = UML_CONFIG_STUB_DATA + PAGE_SIZE - + sizeof(void *); + err = ptrace_setregs(pid, regs); + if(err < 0) + panic("copy_context_skas0 : PTRACE_SETREGS failed, " + "pid = %d, errno = %d\n", pid, errno); + + /* set a well known return code for detection of child write failure */ + child_data->err = 12345678; + + /* Wait, until parent has finished its work: read child's pid from + * parent's stack, and check, if bad result. + */ + wait_stub_done(pid, 0, "copy_context_skas0"); + + pid = data->err; + if(pid < 0) + panic("copy_context_skas0 - stub-parent reports error %d\n", + pid); + + /* Wait, until child has finished too: read child's result from + * child's stack and check it. + */ + wait_stub_done(pid, -1, "copy_context_skas0"); + if (child_data->err != UML_CONFIG_STUB_DATA) + panic("copy_context_skas0 - stub-child reports error %d\n", + child_data->err); + + if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL, + (void *)PTRACE_O_TRACESYSGOOD) < 0) + panic("copy_context_skas0 : PTRACE_SETOPTIONS failed, " + "errno = %d\n", errno); + + return pid; +} + +/* + * This is used only, if proc_mm is available, while PTRACE_FAULTINFO + * isn't. Opening /proc/mm creates a new mm_context, which lacks the stub-pages + * Thus, we map them using /proc/mm-fd + */ +void map_stub_pages(int fd, unsigned long code, + unsigned long data, unsigned long stack) +{ + struct proc_mm_op mmop; + int n; + + mmop = ((struct proc_mm_op) { .op = MM_MMAP, + .u = + { .mmap = + { .addr = code, + .len = PAGE_SIZE, + .prot = PROT_EXEC, + .flags = MAP_FIXED | MAP_PRIVATE, + .fd = stub_code_fd, + .offset = stub_code_offset + } } }); + n = os_write_file(fd, &mmop, sizeof(mmop)); + if(n != sizeof(mmop)) + panic("map_stub_pages : /proc/mm map for code failed, " + "err = %d\n", -n); + + if ( stack ) { + __u64 map_offset; + int map_fd = phys_mapping(to_phys((void *)stack), &map_offset); + mmop = ((struct proc_mm_op) + { .op = MM_MMAP, + .u = + { .mmap = + { .addr = data, + .len = PAGE_SIZE, + .prot = PROT_READ | PROT_WRITE, + .flags = MAP_FIXED | MAP_SHARED, + .fd = map_fd, + .offset = map_offset + } } }); + n = os_write_file(fd, &mmop, sizeof(mmop)); + if(n != sizeof(mmop)) + panic("map_stub_pages : /proc/mm map for data failed, " + "err = %d\n", -n); + } +} + void new_thread(void *stack, void **switch_buf_ptr, void **fork_buf_ptr, void (*handler)(int)) { @@ -334,21 +543,19 @@ void reboot_skas(void) siglongjmp(initial_jmpbuf, INIT_JMP_REBOOT); } -void switch_mm_skas(int mm_fd) +void switch_mm_skas(struct mm_id *mm_idp) { int err; #warning need cpu pid in switch_mm_skas - err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0, mm_fd); - if(err) - panic("switch_mm_skas - PTRACE_SWITCH_MM failed, errno = %d\n", - errno); -} - -void kill_off_processes_skas(void) -{ -#warning need to loop over userspace_pids in kill_off_processes_skas - os_kill_ptraced_process(userspace_pid[0], 1); + if(proc_mm){ + err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0, + mm_idp->u.mm_fd); + if(err) + panic("switch_mm_skas - PTRACE_SWITCH_MM failed, " + "errno = %d\n", errno); + } + else userspace_pid[0] = mm_idp->u.pid; } /* diff --git a/arch/um/kernel/skas/process_kern.c b/arch/um/kernel/skas/process_kern.c index 0a7b8aa55db8..3d1b227226e6 100644 --- a/arch/um/kernel/skas/process_kern.c +++ b/arch/um/kernel/skas/process_kern.c @@ -129,7 +129,9 @@ int copy_thread_skas(int nr, unsigned long clone_flags, unsigned long sp, return(0); } -int new_mm(int from) +extern void map_stub_pages(int fd, unsigned long code, + unsigned long data, unsigned long stack); +int new_mm(int from, unsigned long stack) { struct proc_mm_op copy; int n, fd; @@ -148,6 +150,9 @@ int new_mm(int from) "err = %d\n", -n); } + if(!ptrace_faultinfo) + map_stub_pages(fd, CONFIG_STUB_CODE, CONFIG_STUB_DATA, stack); + return(fd); } @@ -175,9 +180,12 @@ static int start_kernel_proc(void *unused) return(0); } +extern int userspace_pid[]; + int start_uml_skas(void) { - start_userspace(0); + if(proc_mm) + userspace_pid[0] = start_userspace(0); init_new_thread_signals(1); @@ -199,3 +207,31 @@ int thread_pid_skas(struct task_struct *task) #warning Need to look up userspace_pid by cpu return(userspace_pid[0]); } + +void kill_off_processes_skas(void) +{ + if(proc_mm) +#warning need to loop over userspace_pids in kill_off_processes_skas + os_kill_ptraced_process(userspace_pid[0], 1); + else { + struct task_struct *p; + int pid, me; + + me = os_getpid(); + for_each_process(p){ + if(p->mm == NULL) + continue; + + pid = p->mm->context.skas.id.u.pid; + os_kill_ptraced_process(pid, 1); + } + } +} + +unsigned long current_stub_stack(void) +{ + if(current->mm == NULL) + return(0); + + return(current->mm->context.skas.id.stack); +} diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c new file mode 100644 index 000000000000..51fb94076fcf --- /dev/null +++ b/arch/um/kernel/skas/syscall.c @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "linux/sys.h" +#include "linux/ptrace.h" +#include "asm/errno.h" +#include "asm/unistd.h" +#include "asm/ptrace.h" +#include "asm/current.h" +#include "sysdep/syscalls.h" +#include "kern_util.h" +#include "syscall.h" + +void handle_syscall(union uml_pt_regs *r) +{ + struct pt_regs *regs = container_of(r, struct pt_regs, regs); + long result; + int syscall; +#ifdef UML_CONFIG_SYSCALL_DEBUG + int index; + + index = record_syscall_start(UPT_SYSCALL_NR(r)); +#endif + syscall_trace(r, 0); + + current->thread.nsyscalls++; + nsyscalls++; + + /* This should go in the declaration of syscall, but when I do that, + * strace -f -c bash -c 'ls ; ls' breaks, sometimes not tracing + * children at all, sometimes hanging when bash doesn't see the first + * ls exit. + * The assembly looks functionally the same to me. This is + * gcc version 4.0.1 20050727 (Red Hat 4.0.1-5) + * in case it's a compiler bug. + */ + syscall = UPT_SYSCALL_NR(r); + if((syscall >= NR_syscalls) || (syscall < 0)) + result = -ENOSYS; + else result = EXECUTE_SYSCALL(syscall, regs); + + REGS_SET_SYSCALL_RETURN(r->skas.regs, result); + + syscall_trace(r, 1); +#ifdef UML_CONFIG_SYSCALL_DEBUG + record_syscall_end(index, result); +#endif +} diff --git a/arch/um/kernel/skas/syscall_kern.c b/arch/um/kernel/skas/syscall_kern.c deleted file mode 100644 index bdf040ce5b8e..000000000000 --- a/arch/um/kernel/skas/syscall_kern.c +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) - * Licensed under the GPL - */ - -#include "linux/sys.h" -#include "linux/ptrace.h" -#include "asm/errno.h" -#include "asm/unistd.h" -#include "asm/ptrace.h" -#include "asm/current.h" -#include "sysdep/syscalls.h" -#include "kern_util.h" - -extern syscall_handler_t *sys_call_table[]; - -long execute_syscall_skas(void *r) -{ - struct pt_regs *regs = r; - long res; - int syscall; - - current->thread.nsyscalls++; - nsyscalls++; - syscall = UPT_SYSCALL_NR(®s->regs); - - if((syscall >= NR_syscalls) || (syscall < 0)) - res = -ENOSYS; - else res = EXECUTE_SYSCALL(syscall, regs); - - return(res); -} - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/skas/syscall_user.c b/arch/um/kernel/skas/syscall_user.c deleted file mode 100644 index 2828e6e37721..000000000000 --- a/arch/um/kernel/skas/syscall_user.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -#include <stdlib.h> -#include <signal.h> -#include "kern_util.h" -#include "uml-config.h" -#include "syscall_user.h" -#include "sysdep/ptrace.h" -#include "sysdep/sigcontext.h" -#include "skas.h" - -void handle_syscall(union uml_pt_regs *regs) -{ - long result; -#if UML_CONFIG_SYSCALL_DEBUG - int index; - - index = record_syscall_start(UPT_SYSCALL_NR(regs)); -#endif - - syscall_trace(regs, 0); - result = execute_syscall_skas(regs); - - REGS_SET_SYSCALL_RETURN(regs->skas.regs, result); - - syscall_trace(regs, 1); -#if UML_CONFIG_SYSCALL_DEBUG - record_syscall_end(index, result); -#endif -} - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/skas/tlb.c b/arch/um/kernel/skas/tlb.c index b8c5e71763d1..6e84963dfc29 100644 --- a/arch/um/kernel/skas/tlb.c +++ b/arch/um/kernel/skas/tlb.c @@ -6,6 +6,7 @@ #include "linux/stddef.h" #include "linux/sched.h" +#include "linux/config.h" #include "linux/mm.h" #include "asm/page.h" #include "asm/pgtable.h" @@ -17,41 +18,50 @@ #include "os.h" #include "tlb.h" -static void do_ops(int fd, struct host_vm_op *ops, int last) +static int do_ops(union mm_context *mmu, struct host_vm_op *ops, int last, + int finished, void **flush) { struct host_vm_op *op; - int i; + int i, ret = 0; - for(i = 0; i <= last; i++){ + for(i = 0; i <= last && !ret; i++){ op = &ops[i]; switch(op->type){ case MMAP: - map(fd, op->u.mmap.addr, op->u.mmap.len, - op->u.mmap.r, op->u.mmap.w, op->u.mmap.x, - op->u.mmap.fd, op->u.mmap.offset); + ret = map(&mmu->skas.id, op->u.mmap.addr, + op->u.mmap.len, op->u.mmap.r, op->u.mmap.w, + op->u.mmap.x, op->u.mmap.fd, + op->u.mmap.offset, finished, flush); break; case MUNMAP: - unmap(fd, (void *) op->u.munmap.addr, - op->u.munmap.len); + ret = unmap(&mmu->skas.id, + (void *) op->u.munmap.addr, + op->u.munmap.len, finished, flush); break; case MPROTECT: - protect(fd, op->u.mprotect.addr, op->u.mprotect.len, - op->u.mprotect.r, op->u.mprotect.w, - op->u.mprotect.x); + ret = protect(&mmu->skas.id, op->u.mprotect.addr, + op->u.mprotect.len, op->u.mprotect.r, + op->u.mprotect.w, op->u.mprotect.x, + finished, flush); break; default: printk("Unknown op type %d in do_ops\n", op->type); break; } } + + return ret; } +extern int proc_mm; + static void fix_range(struct mm_struct *mm, unsigned long start_addr, unsigned long end_addr, int force) { - int fd = mm->context.skas.mm_fd; + if(!proc_mm && (end_addr > CONFIG_STUB_START)) + end_addr = CONFIG_STUB_START; - fix_range_common(mm, start_addr, end_addr, force, fd, do_ops); + fix_range_common(mm, start_addr, end_addr, force, do_ops); } void __flush_tlb_one_skas(unsigned long addr) @@ -69,17 +79,20 @@ void flush_tlb_range_skas(struct vm_area_struct *vma, unsigned long start, void flush_tlb_mm_skas(struct mm_struct *mm) { + unsigned long end; + /* Don't bother flushing if this address space is about to be * destroyed. */ if(atomic_read(&mm->mm_users) == 0) return; - fix_range(mm, 0, host_task_size, 0); - flush_tlb_kernel_range_common(start_vm, end_vm); + end = proc_mm ? task_size : CONFIG_STUB_START; + fix_range(mm, 0, end, 0); } void force_flush_all_skas(void) { - fix_range(current->mm, 0, host_task_size, 1); + unsigned long end = proc_mm ? task_size : CONFIG_STUB_START; + fix_range(current->mm, 0, end, 1); } diff --git a/arch/um/kernel/skas/trap_user.c b/arch/um/kernel/skas/trap_user.c index 0dee1d95c806..9950a6716fe5 100644 --- a/arch/um/kernel/skas/trap_user.c +++ b/arch/um/kernel/skas/trap_user.c @@ -58,7 +58,6 @@ void user_signal(int sig, union uml_pt_regs *regs, int pid) int segv = ((sig == SIGFPE) || (sig == SIGSEGV) || (sig == SIGBUS) || (sig == SIGILL) || (sig == SIGTRAP)); - regs->skas.is_user = 1; if (segv) get_skas_faultinfo(pid, ®s->skas.faultinfo); info = &sig_info[sig]; diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c new file mode 100644 index 000000000000..1429c131879d --- /dev/null +++ b/arch/um/kernel/syscall.c @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "kern_util.h" +#include "syscall.h" +#include "os.h" + +struct { + int syscall; + int pid; + long result; + unsigned long long start; + unsigned long long end; +} syscall_record[1024]; + +int record_syscall_start(int syscall) +{ + int max, index; + + max = sizeof(syscall_record)/sizeof(syscall_record[0]); + index = next_syscall_index(max); + + syscall_record[index].syscall = syscall; + syscall_record[index].pid = current_pid(); + syscall_record[index].result = 0xdeadbeef; + syscall_record[index].start = os_usecs(); + return(index); +} + +void record_syscall_end(int index, long result) +{ + syscall_record[index].result = result; + syscall_record[index].end = os_usecs(); +} diff --git a/arch/um/kernel/syscall_user.c b/arch/um/kernel/syscall_user.c deleted file mode 100644 index 01b711e00a85..000000000000 --- a/arch/um/kernel/syscall_user.c +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -#include <stdlib.h> -#include <sys/time.h> -#include "kern_util.h" -#include "syscall_user.h" - -struct { - int syscall; - int pid; - long result; - struct timeval start; - struct timeval end; -} syscall_record[1024]; - -int record_syscall_start(int syscall) -{ - int max, index; - - max = sizeof(syscall_record)/sizeof(syscall_record[0]); - index = next_syscall_index(max); - - syscall_record[index].syscall = syscall; - syscall_record[index].pid = current_pid(); - syscall_record[index].result = 0xdeadbeef; - gettimeofday(&syscall_record[index].start, NULL); - return(index); -} - -void record_syscall_end(int index, long result) -{ - syscall_record[index].result = result; - gettimeofday(&syscall_record[index].end, NULL); -} - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index f829b309b63c..c40b611e3d93 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -48,6 +48,13 @@ void enable_timer(void) set_interval(ITIMER_VIRTUAL); } +void prepare_timer(void * ptr) +{ + int usec = 1000000/hz(); + *(struct itimerval *)ptr = ((struct itimerval) { { 0, usec }, + { 0, usec }}); +} + void disable_timer(void) { struct itimerval disable = ((struct itimerval) { { 0, 0 }, { 0, 0 }}); diff --git a/arch/um/kernel/time_kern.c b/arch/um/kernel/time_kern.c index a8b4ef601f59..4e08f7545d63 100644 --- a/arch/um/kernel/time_kern.c +++ b/arch/um/kernel/time_kern.c @@ -137,7 +137,10 @@ long um_stime(int __user *tptr) void timer_handler(int sig, union uml_pt_regs *regs) { local_irq_disable(); - update_process_times(CHOOSE_MODE(user_context(UPT_SP(regs)), (regs)->skas.is_user)); + irq_enter(); + update_process_times(CHOOSE_MODE(user_context(UPT_SP(regs)), + (regs)->skas.is_user)); + irq_exit(); local_irq_enable(); if(current_thread->cpu == 0) timer_irq(regs); diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index eda477edfdf5..80ed6188e8a2 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -15,33 +15,144 @@ #include "mem_user.h" #include "os.h" +static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, + int r, int w, int x, struct host_vm_op *ops, int *index, + int last_filled, union mm_context *mmu, void **flush, + int (*do_ops)(union mm_context *, struct host_vm_op *, + int, int, void **)) +{ + __u64 offset; + struct host_vm_op *last; + int fd, ret = 0; + + fd = phys_mapping(phys, &offset); + if(*index != -1){ + last = &ops[*index]; + if((last->type == MMAP) && + (last->u.mmap.addr + last->u.mmap.len == virt) && + (last->u.mmap.r == r) && (last->u.mmap.w == w) && + (last->u.mmap.x == x) && (last->u.mmap.fd == fd) && + (last->u.mmap.offset + last->u.mmap.len == offset)){ + last->u.mmap.len += len; + return 0; + } + } + + if(*index == last_filled){ + ret = (*do_ops)(mmu, ops, last_filled, 0, flush); + *index = -1; + } + + ops[++*index] = ((struct host_vm_op) { .type = MMAP, + .u = { .mmap = { + .addr = virt, + .len = len, + .r = r, + .w = w, + .x = x, + .fd = fd, + .offset = offset } + } }); + return ret; +} + +static int add_munmap(unsigned long addr, unsigned long len, + struct host_vm_op *ops, int *index, int last_filled, + union mm_context *mmu, void **flush, + int (*do_ops)(union mm_context *, struct host_vm_op *, + int, int, void **)) +{ + struct host_vm_op *last; + int ret = 0; + + if(*index != -1){ + last = &ops[*index]; + if((last->type == MUNMAP) && + (last->u.munmap.addr + last->u.mmap.len == addr)){ + last->u.munmap.len += len; + return 0; + } + } + + if(*index == last_filled){ + ret = (*do_ops)(mmu, ops, last_filled, 0, flush); + *index = -1; + } + + ops[++*index] = ((struct host_vm_op) { .type = MUNMAP, + .u = { .munmap = { + .addr = addr, + .len = len } } }); + return ret; +} + +static int add_mprotect(unsigned long addr, unsigned long len, int r, int w, + int x, struct host_vm_op *ops, int *index, + int last_filled, union mm_context *mmu, void **flush, + int (*do_ops)(union mm_context *, struct host_vm_op *, + int, int, void **)) +{ + struct host_vm_op *last; + int ret = 0; + + if(*index != -1){ + last = &ops[*index]; + if((last->type == MPROTECT) && + (last->u.mprotect.addr + last->u.mprotect.len == addr) && + (last->u.mprotect.r == r) && (last->u.mprotect.w == w) && + (last->u.mprotect.x == x)){ + last->u.mprotect.len += len; + return 0; + } + } + + if(*index == last_filled){ + ret = (*do_ops)(mmu, ops, last_filled, 0, flush); + *index = -1; + } + + ops[++*index] = ((struct host_vm_op) { .type = MPROTECT, + .u = { .mprotect = { + .addr = addr, + .len = len, + .r = r, + .w = w, + .x = x } } }); + return ret; +} + #define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1)) void fix_range_common(struct mm_struct *mm, unsigned long start_addr, - unsigned long end_addr, int force, int data, - void (*do_ops)(int, struct host_vm_op *, int)) + unsigned long end_addr, int force, + int (*do_ops)(union mm_context *, struct host_vm_op *, + int, int, void **)) { pgd_t *npgd; pud_t *npud; pmd_t *npmd; pte_t *npte; + union mm_context *mmu = &mm->context; unsigned long addr, end; int r, w, x; - struct host_vm_op ops[16]; + struct host_vm_op ops[1]; + void *flush = NULL; int op_index = -1, last_op = sizeof(ops) / sizeof(ops[0]) - 1; + int ret = 0; if(mm == NULL) return; - for(addr = start_addr; addr < end_addr;){ + ops[0].type = NONE; + for(addr = start_addr; addr < end_addr && !ret;){ npgd = pgd_offset(mm, addr); if(!pgd_present(*npgd)){ end = ADD_ROUND(addr, PGDIR_SIZE); if(end > end_addr) end = end_addr; if(force || pgd_newpage(*npgd)){ - op_index = add_munmap(addr, end - addr, ops, - op_index, last_op, data, - do_ops); + ret = add_munmap(addr, end - addr, ops, + &op_index, last_op, mmu, + &flush, do_ops); pgd_mkuptodate(*npgd); } addr = end; @@ -54,9 +165,9 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, if(end > end_addr) end = end_addr; if(force || pud_newpage(*npud)){ - op_index = add_munmap(addr, end - addr, ops, - op_index, last_op, data, - do_ops); + ret = add_munmap(addr, end - addr, ops, + &op_index, last_op, mmu, + &flush, do_ops); pud_mkuptodate(*npud); } addr = end; @@ -69,9 +180,9 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, if(end > end_addr) end = end_addr; if(force || pmd_newpage(*npmd)){ - op_index = add_munmap(addr, end - addr, ops, - op_index, last_op, data, - do_ops); + ret = add_munmap(addr, end - addr, ops, + &op_index, last_op, mmu, + &flush, do_ops); pmd_mkuptodate(*npmd); } addr = end; @@ -90,24 +201,32 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, } if(force || pte_newpage(*npte)){ if(pte_present(*npte)) - op_index = add_mmap(addr, - pte_val(*npte) & PAGE_MASK, - PAGE_SIZE, r, w, x, ops, - op_index, last_op, data, - do_ops); - else op_index = add_munmap(addr, PAGE_SIZE, ops, - op_index, last_op, data, - do_ops); + ret = add_mmap(addr, + pte_val(*npte) & PAGE_MASK, + PAGE_SIZE, r, w, x, ops, + &op_index, last_op, mmu, + &flush, do_ops); + else ret = add_munmap(addr, PAGE_SIZE, ops, + &op_index, last_op, mmu, + &flush, do_ops); } else if(pte_newprot(*npte)) - op_index = add_mprotect(addr, PAGE_SIZE, r, w, x, ops, - op_index, last_op, data, - do_ops); + ret = add_mprotect(addr, PAGE_SIZE, r, w, x, ops, + &op_index, last_op, mmu, + &flush, do_ops); *npte = pte_mkuptodate(*npte); addr += PAGE_SIZE; } - (*do_ops)(data, ops, op_index); + + if(!ret) + ret = (*do_ops)(mmu, ops, op_index, 1, &flush); + + /* This is not an else because ret is modified above */ + if(ret) { + printk("fix_range_common: failed, killing current process\n"); + force_sig(SIGKILL, current); + } } int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) @@ -195,51 +314,6 @@ int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) return(updated); } -void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) -{ - address &= PAGE_MASK; - flush_tlb_range(vma, address, address + PAGE_SIZE); -} - -void flush_tlb_all(void) -{ - flush_tlb_mm(current->mm); -} - -void flush_tlb_kernel_range(unsigned long start, unsigned long end) -{ - CHOOSE_MODE_PROC(flush_tlb_kernel_range_tt, - flush_tlb_kernel_range_common, start, end); -} - -void flush_tlb_kernel_vm(void) -{ - CHOOSE_MODE(flush_tlb_kernel_vm_tt(), - flush_tlb_kernel_range_common(start_vm, end_vm)); -} - -void __flush_tlb_one(unsigned long addr) -{ - CHOOSE_MODE_PROC(__flush_tlb_one_tt, __flush_tlb_one_skas, addr); -} - -void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) -{ - CHOOSE_MODE_PROC(flush_tlb_range_tt, flush_tlb_range_skas, vma, start, - end); -} - -void flush_tlb_mm(struct mm_struct *mm) -{ - CHOOSE_MODE_PROC(flush_tlb_mm_tt, flush_tlb_mm_skas, mm); -} - -void force_flush_all(void) -{ - CHOOSE_MODE(force_flush_all_tt(), force_flush_all_skas()); -} - pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address) { return(pgd_offset(mm, address)); @@ -269,101 +343,48 @@ pte_t *addr_pte(struct task_struct *task, unsigned long addr) return(pte_offset_map(pmd, addr)); } -int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, - int r, int w, int x, struct host_vm_op *ops, int index, - int last_filled, int data, - void (*do_ops)(int, struct host_vm_op *, int)) +void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) { - __u64 offset; - struct host_vm_op *last; - int fd; - - fd = phys_mapping(phys, &offset); - if(index != -1){ - last = &ops[index]; - if((last->type == MMAP) && - (last->u.mmap.addr + last->u.mmap.len == virt) && - (last->u.mmap.r == r) && (last->u.mmap.w == w) && - (last->u.mmap.x == x) && (last->u.mmap.fd == fd) && - (last->u.mmap.offset + last->u.mmap.len == offset)){ - last->u.mmap.len += len; - return(index); - } - } - - if(index == last_filled){ - (*do_ops)(data, ops, last_filled); - index = -1; - } - - ops[++index] = ((struct host_vm_op) { .type = MMAP, - .u = { .mmap = { - .addr = virt, - .len = len, - .r = r, - .w = w, - .x = x, - .fd = fd, - .offset = offset } - } }); - return(index); + address &= PAGE_MASK; + flush_tlb_range(vma, address, address + PAGE_SIZE); } -int add_munmap(unsigned long addr, unsigned long len, struct host_vm_op *ops, - int index, int last_filled, int data, - void (*do_ops)(int, struct host_vm_op *, int)) +void flush_tlb_all(void) { - struct host_vm_op *last; - - if(index != -1){ - last = &ops[index]; - if((last->type == MUNMAP) && - (last->u.munmap.addr + last->u.mmap.len == addr)){ - last->u.munmap.len += len; - return(index); - } - } + flush_tlb_mm(current->mm); +} - if(index == last_filled){ - (*do_ops)(data, ops, last_filled); - index = -1; - } +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + CHOOSE_MODE_PROC(flush_tlb_kernel_range_tt, + flush_tlb_kernel_range_common, start, end); +} - ops[++index] = ((struct host_vm_op) { .type = MUNMAP, - .u = { .munmap = { - .addr = addr, - .len = len } } }); - return(index); +void flush_tlb_kernel_vm(void) +{ + CHOOSE_MODE(flush_tlb_kernel_vm_tt(), + flush_tlb_kernel_range_common(start_vm, end_vm)); } -int add_mprotect(unsigned long addr, unsigned long len, int r, int w, int x, - struct host_vm_op *ops, int index, int last_filled, int data, - void (*do_ops)(int, struct host_vm_op *, int)) +void __flush_tlb_one(unsigned long addr) { - struct host_vm_op *last; + CHOOSE_MODE_PROC(__flush_tlb_one_tt, __flush_tlb_one_skas, addr); +} - if(index != -1){ - last = &ops[index]; - if((last->type == MPROTECT) && - (last->u.mprotect.addr + last->u.mprotect.len == addr) && - (last->u.mprotect.r == r) && (last->u.mprotect.w == w) && - (last->u.mprotect.x == x)){ - last->u.mprotect.len += len; - return(index); - } - } +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + CHOOSE_MODE_PROC(flush_tlb_range_tt, flush_tlb_range_skas, vma, start, + end); +} - if(index == last_filled){ - (*do_ops)(data, ops, last_filled); - index = -1; - } +void flush_tlb_mm(struct mm_struct *mm) +{ + CHOOSE_MODE_PROC(flush_tlb_mm_tt, flush_tlb_mm_skas, mm); +} - ops[++index] = ((struct host_vm_op) { .type = MPROTECT, - .u = { .mprotect = { - .addr = addr, - .len = len, - .r = r, - .w = w, - .x = x } } }); - return(index); +void force_flush_all(void) +{ + CHOOSE_MODE(force_flush_all_tt(), force_flush_all_skas()); } + diff --git a/arch/um/kernel/trap_kern.c b/arch/um/kernel/trap_kern.c index c20aef120598..87cc6fd76ced 100644 --- a/arch/um/kernel/trap_kern.c +++ b/arch/um/kernel/trap_kern.c @@ -26,6 +26,7 @@ #include "mem.h" #include "mem_kern.h" +/* Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by segv(). */ int handle_page_fault(unsigned long address, unsigned long ip, int is_write, int is_user, int *code_out) { @@ -35,7 +36,6 @@ int handle_page_fault(unsigned long address, unsigned long ip, pud_t *pud; pmd_t *pmd; pte_t *pte; - unsigned long page; int err = -EFAULT; *code_out = SEGV_MAPERR; @@ -52,17 +52,17 @@ int handle_page_fault(unsigned long address, unsigned long ip, else if(expand_stack(vma, address)) goto out; - good_area: +good_area: *code_out = SEGV_ACCERR; if(is_write && !(vma->vm_flags & VM_WRITE)) goto out; - if(!(vma->vm_flags & (VM_READ | VM_EXEC))) + /* Don't require VM_READ|VM_EXEC for write faults! */ + if(!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC))) goto out; - page = address & PAGE_MASK; do { - survive: +survive: switch (handle_mm_fault(mm, vma, address, is_write)){ case VM_FAULT_MINOR: current->min_flt++; @@ -79,16 +79,15 @@ int handle_page_fault(unsigned long address, unsigned long ip, default: BUG(); } - pgd = pgd_offset(mm, page); - pud = pud_offset(pgd, page); - pmd = pmd_offset(pud, page); - pte = pte_offset_kernel(pmd, page); + pgd = pgd_offset(mm, address); + pud = pud_offset(pgd, address); + pmd = pmd_offset(pud, address); + pte = pte_offset_kernel(pmd, address); } while(!pte_present(*pte)); err = 0; - *pte = pte_mkyoung(*pte); - if(pte_write(*pte)) *pte = pte_mkdirty(*pte); - flush_tlb_page(vma, page); - out: + WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte))); + flush_tlb_page(vma, address); +out: up_read(&mm->mmap_sem); return(err); @@ -144,19 +143,18 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, void *sc) panic("Kernel mode fault at addr 0x%lx, ip 0x%lx", address, ip); - if(err == -EACCES){ + if (err == -EACCES) { si.si_signo = SIGBUS; si.si_errno = 0; si.si_code = BUS_ADRERR; si.si_addr = (void *)address; current->thread.arch.faultinfo = fi; force_sig_info(SIGBUS, &si, current); - } - else if(err == -ENOMEM){ + } else if (err == -ENOMEM) { printk("VM: killing process %s\n", current->comm); do_exit(SIGKILL); - } - else { + } else { + BUG_ON(err != -EFAULT); si.si_signo = SIGSEGV; si.si_addr = (void *) address; current->thread.arch.faultinfo = fi; @@ -200,30 +198,3 @@ void winch(int sig, union uml_pt_regs *regs) void trap_init(void) { } - -DEFINE_SPINLOCK(trap_lock); - -static int trap_index = 0; - -int next_trap_index(int limit) -{ - int ret; - - spin_lock(&trap_lock); - ret = trap_index; - if(++trap_index == limit) - trap_index = 0; - spin_unlock(&trap_lock); - return(ret); -} - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/trap_user.c b/arch/um/kernel/trap_user.c index f825a6eda3f5..e9ccd6b8d3c7 100644 --- a/arch/um/kernel/trap_user.c +++ b/arch/um/kernel/trap_user.c @@ -40,35 +40,14 @@ void kill_child_dead(int pid) } while(1); } -/* Unlocked - don't care if this is a bit off */ -int nsegfaults = 0; - -struct { - unsigned long address; - int is_write; - int pid; - unsigned long sp; - int is_user; -} segfault_record[1024]; - void segv_handler(int sig, union uml_pt_regs *regs) { - int index, max; struct faultinfo * fi = UPT_FAULTINFO(regs); if(UPT_IS_USER(regs) && !SEGV_IS_FIXABLE(fi)){ bad_segv(*fi, UPT_IP(regs)); return; } - max = sizeof(segfault_record)/sizeof(segfault_record[0]); - index = next_trap_index(max); - - nsegfaults++; - segfault_record[index].address = FAULT_ADDRESS(*fi); - segfault_record[index].pid = os_getpid(); - segfault_record[index].is_write = FAULT_WRITE(*fi); - segfault_record[index].sp = UPT_SP(regs); - segfault_record[index].is_user = UPT_IS_USER(regs); segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs); } diff --git a/arch/um/kernel/tt/include/uaccess-tt.h b/arch/um/kernel/tt/include/uaccess-tt.h index 3fbb5fe26f49..aa6db384af80 100644 --- a/arch/um/kernel/tt/include/uaccess-tt.h +++ b/arch/um/kernel/tt/include/uaccess-tt.h @@ -33,7 +33,7 @@ extern unsigned long uml_physmem; (((unsigned long) (addr) <= ((unsigned long) (addr) + (size))) && \ (under_task_size(addr, size) || is_stack(addr, size)))) -static inline int verify_area_tt(int type, const void * addr, +static inline int verify_area_tt(int type, const void __user * addr, unsigned long size) { return(access_ok_tt(type, addr, size) ? 0 : -EFAULT); @@ -50,12 +50,12 @@ extern int __do_clear_user(void *mem, size_t len, void **fault_addr, extern int __do_strnlen_user(const char *str, unsigned long n, void **fault_addr, void **fault_catcher); -extern int copy_from_user_tt(void *to, const void *from, int n); -extern int copy_to_user_tt(void *to, const void *from, int n); -extern int strncpy_from_user_tt(char *dst, const char *src, int count); -extern int __clear_user_tt(void *mem, int len); -extern int clear_user_tt(void *mem, int len); -extern int strnlen_user_tt(const void *str, int len); +extern int copy_from_user_tt(void *to, const void __user *from, int n); +extern int copy_to_user_tt(void __user *to, const void *from, int n); +extern int strncpy_from_user_tt(char *dst, const char __user *src, int count); +extern int __clear_user_tt(void __user *mem, int len); +extern int clear_user_tt(void __user *mem, int len); +extern int strnlen_user_tt(const void __user *str, int len); #endif diff --git a/arch/um/kernel/tt/syscall_kern.c b/arch/um/kernel/tt/syscall_kern.c index 2650a628719e..3d29c90514cc 100644 --- a/arch/um/kernel/tt/syscall_kern.c +++ b/arch/um/kernel/tt/syscall_kern.c @@ -12,36 +12,41 @@ #include "asm/uaccess.h" #include "asm/stat.h" #include "sysdep/syscalls.h" +#include "sysdep/sigcontext.h" #include "kern_util.h" +#include "syscall.h" -extern syscall_handler_t *sys_call_table[]; - -long execute_syscall_tt(void *r) +void syscall_handler_tt(int sig, struct pt_regs *regs) { - struct pt_regs *regs = r; - long res; + void *sc; + long result; int syscall; - #ifdef CONFIG_SYSCALL_DEBUG + int index; + index = record_syscall_start(syscall); +#endif + sc = UPT_SC(®s->regs); + SC_START_SYSCALL(sc); + + syscall_trace(®s->regs, 0); + current->thread.nsyscalls++; nsyscalls++; -#endif syscall = UPT_SYSCALL_NR(®s->regs); if((syscall >= NR_syscalls) || (syscall < 0)) - res = -ENOSYS; - else res = EXECUTE_SYSCALL(syscall, regs); + result = -ENOSYS; + else result = EXECUTE_SYSCALL(syscall, regs); - return(res); -} + /* regs->sc may have changed while the system call ran (there may + * have been an interrupt or segfault), so it needs to be refreshed. + */ + UPT_SC(®s->regs) = sc; -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ + SC_SET_SYSCALL_RETURN(sc, result); + + syscall_trace(®s->regs, 1); +#ifdef CONFIG_SYSCALL_DEBUG + record_syscall_end(index, result); +#endif +} diff --git a/arch/um/kernel/tt/syscall_user.c b/arch/um/kernel/tt/syscall_user.c index b218316cfdb2..902987bf379b 100644 --- a/arch/um/kernel/tt/syscall_user.c +++ b/arch/um/kernel/tt/syscall_user.c @@ -13,42 +13,9 @@ #include "task.h" #include "user_util.h" #include "kern_util.h" -#include "syscall_user.h" +#include "syscall.h" #include "tt.h" - -void syscall_handler_tt(int sig, union uml_pt_regs *regs) -{ - void *sc; - long result; - int syscall; -#ifdef UML_CONFIG_DEBUG_SYSCALL - int index; -#endif - - syscall = UPT_SYSCALL_NR(regs); - sc = UPT_SC(regs); - SC_START_SYSCALL(sc); - -#ifdef UML_CONFIG_DEBUG_SYSCALL - index = record_syscall_start(syscall); -#endif - syscall_trace(regs, 0); - result = execute_syscall_tt(regs); - - /* regs->sc may have changed while the system call ran (there may - * have been an interrupt or segfault), so it needs to be refreshed. - */ - UPT_SC(regs) = sc; - - SC_SET_SYSCALL_RETURN(sc, result); - - syscall_trace(regs, 1); -#ifdef UML_CONFIG_DEBUG_SYSCALL - record_syscall_end(index, result); -#endif -} - void do_sigtrap(void *task) { UPT_SYSCALL_NR(TASK_REGS(task)) = -1; diff --git a/arch/um/kernel/tt/tlb.c b/arch/um/kernel/tt/tlb.c index 203216ad86f1..f1d85dbb45b9 100644 --- a/arch/um/kernel/tt/tlb.c +++ b/arch/um/kernel/tt/tlb.c @@ -17,25 +17,31 @@ #include "os.h" #include "tlb.h" -static void do_ops(int unused, struct host_vm_op *ops, int last) +static int do_ops(union mm_context *mmu, struct host_vm_op *ops, int last, + int finished, void **flush) { struct host_vm_op *op; - int i; + int i, ret=0; - for(i = 0; i <= last; i++){ + for(i = 0; i <= last && !ret; i++){ op = &ops[i]; switch(op->type){ case MMAP: - os_map_memory((void *) op->u.mmap.addr, op->u.mmap.fd, - op->u.mmap.offset, op->u.mmap.len, - op->u.mmap.r, op->u.mmap.w, - op->u.mmap.x); + ret = os_map_memory((void *) op->u.mmap.addr, + op->u.mmap.fd, op->u.mmap.offset, + op->u.mmap.len, op->u.mmap.r, + op->u.mmap.w, op->u.mmap.x); break; case MUNMAP: - os_unmap_memory((void *) op->u.munmap.addr, - op->u.munmap.len); + ret = os_unmap_memory((void *) op->u.munmap.addr, + op->u.munmap.len); break; case MPROTECT: + ret = protect_memory(op->u.mprotect.addr, + op->u.munmap.len, + op->u.mprotect.r, + op->u.mprotect.w, + op->u.mprotect.x, 1); protect_memory(op->u.mprotect.addr, op->u.munmap.len, op->u.mprotect.r, op->u.mprotect.w, op->u.mprotect.x, 1); @@ -45,6 +51,8 @@ static void do_ops(int unused, struct host_vm_op *ops, int last) break; } } + + return ret; } static void fix_range(struct mm_struct *mm, unsigned long start_addr, @@ -55,7 +63,7 @@ static void fix_range(struct mm_struct *mm, unsigned long start_addr, panic("fix_range fixing wrong address space, current = 0x%p", current); - fix_range_common(mm, start_addr, end_addr, force, 0, do_ops); + fix_range_common(mm, start_addr, end_addr, force, do_ops); } atomic_t vmchange_seq = ATOMIC_INIT(1); diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 8736d098f0ee..09f6f7ce4695 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -38,6 +38,9 @@ #include "choose-mode.h" #include "mode_kern.h" #include "mode.h" +#ifdef UML_CONFIG_MODE_SKAS +#include "skas.h" +#endif #define DEFAULT_COMMAND_LINE "root=98:0" @@ -123,7 +126,7 @@ unsigned long start_vm; unsigned long end_vm; int ncpus = 1; -#ifdef CONFIG_MODE_TT +#ifdef CONFIG_CMDLINE_ON_HOST /* Pointer set in linux_main, the array itself is private to each thread, * and changed at address space creation time so this poses no concurrency * problems. @@ -138,7 +141,7 @@ long physmem_size = 32 * 1024 * 1024; void set_cmdline(char *cmd) { -#ifdef CONFIG_MODE_TT +#ifdef CONFIG_CMDLINE_ON_HOST char *umid, *ptr; if(CHOOSE_MODE(honeypot, 0)) return; @@ -318,6 +321,7 @@ int linux_main(int argc, char **argv) unsigned long avail, diff; unsigned long virtmem_size, max_physmem; unsigned int i, add; + char * mode; for (i = 1; i < argc; i++){ if((i == 1) && (argv[i][0] == ' ')) continue; @@ -329,6 +333,7 @@ int linux_main(int argc, char **argv) if(have_root == 0) add_arg(DEFAULT_COMMAND_LINE); + os_early_checks(); mode_tt = force_tt ? 1 : !can_do_skas(); #ifndef CONFIG_MODE_TT if (mode_tt) { @@ -338,6 +343,21 @@ int linux_main(int argc, char **argv) exit(1); } #endif + +#ifndef CONFIG_MODE_SKAS + mode = "TT"; +#else + /* Show to the user the result of selection */ + if (mode_tt) + mode = "TT"; + else if (proc_mm && ptrace_faultinfo) + mode = "SKAS3"; + else + mode = "SKAS0"; +#endif + + printf("UML running in %s mode\n", mode); + uml_start = CHOOSE_MODE_PROC(set_task_sizes_tt, set_task_sizes_skas, 0, &host_task_size, &task_size); @@ -366,7 +386,7 @@ int linux_main(int argc, char **argv) setup_machinename(system_utsname.machine); -#ifdef CONFIG_MODE_TT +#ifdef CONFIG_CMDLINE_ON_HOST argv1_begin = argv[1]; argv1_end = &argv[1][strlen(argv[1])]; #endif @@ -451,7 +471,6 @@ void __init setup_arch(char **cmdline_p) void __init check_bugs(void) { arch_check_bugs(); - check_ptrace(); check_sigio(); check_devanon(); } diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S index 61dfd4fef752..af11915ce0a8 100644 --- a/arch/um/kernel/uml.lds.S +++ b/arch/um/kernel/uml.lds.S @@ -16,8 +16,8 @@ SECTIONS __binary_start = .; #ifdef MODE_TT - .remap_data : { arch/um/sys-SUBARCH/unmap_fin.o (.data .bss) } - .remap : { arch/um/sys-SUBARCH/unmap_fin.o (.text) } + .remap_data : { UNMAP_PATH (.data .bss) } + .remap : { UNMAP_PATH (.text) } . = ALIGN(4096); /* Init code and data */ #endif @@ -30,6 +30,7 @@ SECTIONS _einittext = .; } . = ALIGN(4096); + .text : { *(.text) @@ -39,6 +40,12 @@ SECTIONS /* .gnu.warning sections are handled specially by elf32.em. */ *(.gnu.warning) *(.gnu.linkonce.t*) + + . = ALIGN(4096); + __syscall_stub_start = .; + *(.__syscall_stub*) + __syscall_stub_end = .; + . = ALIGN(4096); } #include "asm/common.lds.S" @@ -86,14 +93,10 @@ SECTIONS *(.bss) *(COMMON) } - _end = . ; + _end = .; PROVIDE (end = .); - /* Stabs debugging sections. */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } + + STABS_DEBUG + + DWARF_DEBUG } |
