summaryrefslogtreecommitdiff
path: root/arch/um
diff options
context:
space:
mode:
Diffstat (limited to 'arch/um')
-rw-r--r--arch/um/Kconfig20
-rw-r--r--arch/um/Kconfig.char (renamed from arch/um/Kconfig_char)0
-rw-r--r--arch/um/Kconfig.debug11
-rw-r--r--arch/um/Kconfig.i386 (renamed from arch/um/Kconfig_i386)16
-rw-r--r--arch/um/Kconfig.net (renamed from arch/um/Kconfig_net)14
-rw-r--r--arch/um/Kconfig.scsi (renamed from arch/um/Kconfig_scsi)0
-rw-r--r--arch/um/Kconfig.x86_64 (renamed from arch/um/Kconfig_x86_64)20
-rw-r--r--arch/um/Makefile63
-rw-r--r--arch/um/Makefile-i38630
-rw-r--r--arch/um/Makefile-x86_647
-rw-r--r--arch/um/defconfig58
-rw-r--r--arch/um/drivers/Makefile21
-rw-r--r--arch/um/drivers/chan_user.c11
-rw-r--r--arch/um/drivers/cow.h4
-rw-r--r--arch/um/drivers/hostaudio_kern.c4
-rw-r--r--arch/um/drivers/line.c37
-rw-r--r--arch/um/drivers/mconsole_kern.c2
-rw-r--r--arch/um/drivers/mmapper_kern.c41
-rw-r--r--arch/um/drivers/ubd_kern.c570
-rw-r--r--arch/um/include/aio.h40
-rw-r--r--arch/um/include/common-offsets.h1
-rw-r--r--arch/um/include/init.h10
-rw-r--r--arch/um/include/irq_kern.h3
-rw-r--r--arch/um/include/mem.h11
-rw-r--r--arch/um/include/os.h15
-rw-r--r--arch/um/include/registers.h1
-rw-r--r--arch/um/include/syscall.h12
-rw-r--r--arch/um/include/syscall_user.h23
-rw-r--r--arch/um/include/sysdep-i386/ptrace_user.h13
-rw-r--r--arch/um/include/sysdep-i386/stub.h65
-rw-r--r--arch/um/include/sysdep-i386/syscalls.h2
-rw-r--r--arch/um/include/sysdep-x86_64/ptrace.h2
-rw-r--r--arch/um/include/sysdep-x86_64/ptrace_user.h14
-rw-r--r--arch/um/include/sysdep-x86_64/stub.h58
-rw-r--r--arch/um/include/sysdep-x86_64/syscalls.h2
-rw-r--r--arch/um/include/time_user.h1
-rw-r--r--arch/um/include/tlb.h30
-rw-r--r--arch/um/include/um_uaccess.h7
-rw-r--r--arch/um/include/user_util.h8
-rw-r--r--arch/um/kernel/Makefile15
-rw-r--r--arch/um/kernel/asm-offsets.c1
-rw-r--r--arch/um/kernel/dyn.lds.S43
-rw-r--r--arch/um/kernel/exitcode.c2
-rw-r--r--arch/um/kernel/helper.c14
-rw-r--r--arch/um/kernel/irq.c41
-rw-r--r--arch/um/kernel/ksyms.c24
-rw-r--r--arch/um/kernel/main.c2
-rw-r--r--arch/um/kernel/mem.c2
-rw-r--r--arch/um/kernel/physmem.c43
-rw-r--r--arch/um/kernel/process_kern.c2
-rw-r--r--arch/um/kernel/reboot.c6
-rw-r--r--arch/um/kernel/signal_kern.c6
-rw-r--r--arch/um/kernel/skas/Makefile9
-rw-r--r--arch/um/kernel/skas/clone.c44
-rw-r--r--arch/um/kernel/skas/exec_kern.c2
-rw-r--r--arch/um/kernel/skas/include/mm_id.h17
-rw-r--r--arch/um/kernel/skas/include/mmu-skas.h11
-rw-r--r--arch/um/kernel/skas/include/skas.h38
-rw-r--r--arch/um/kernel/skas/include/stub-data.h18
-rw-r--r--arch/um/kernel/skas/include/uaccess-skas.h14
-rw-r--r--arch/um/kernel/skas/mem.c6
-rw-r--r--arch/um/kernel/skas/mem_user.c315
-rw-r--r--arch/um/kernel/skas/mmu.c152
-rw-r--r--arch/um/kernel/skas/process.c273
-rw-r--r--arch/um/kernel/skas/process_kern.c40
-rw-r--r--arch/um/kernel/skas/syscall.c50
-rw-r--r--arch/um/kernel/skas/syscall_kern.c43
-rw-r--r--arch/um/kernel/skas/syscall_user.c44
-rw-r--r--arch/um/kernel/skas/tlb.c45
-rw-r--r--arch/um/kernel/skas/trap_user.c1
-rw-r--r--arch/um/kernel/syscall.c36
-rw-r--r--arch/um/kernel/syscall_user.c48
-rw-r--r--arch/um/kernel/time.c7
-rw-r--r--arch/um/kernel/time_kern.c5
-rw-r--r--arch/um/kernel/tlb.c331
-rw-r--r--arch/um/kernel/trap_kern.c61
-rw-r--r--arch/um/kernel/trap_user.c21
-rw-r--r--arch/um/kernel/tt/include/uaccess-tt.h14
-rw-r--r--arch/um/kernel/tt/syscall_kern.c47
-rw-r--r--arch/um/kernel/tt/syscall_user.c35
-rw-r--r--arch/um/kernel/tt/tlb.c28
-rw-r--r--arch/um/kernel/um_arch.c27
-rw-r--r--arch/um/kernel/uml.lds.S25
-rw-r--r--arch/um/os-Linux/Makefile14
-rw-r--r--arch/um/os-Linux/aio.c414
-rw-r--r--arch/um/os-Linux/elf_aux.c7
-rw-r--r--arch/um/os-Linux/process.c58
-rw-r--r--arch/um/os-Linux/start_up.c (renamed from arch/um/kernel/process.c)295
-rw-r--r--arch/um/os-Linux/sys-i386/registers.c5
-rw-r--r--arch/um/os-Linux/sys-x86_64/registers.c5
-rw-r--r--arch/um/os-Linux/tt.c113
-rw-r--r--arch/um/os-Linux/user_syms.c3
-rw-r--r--arch/um/scripts/Makefile.rules10
-rw-r--r--arch/um/scripts/Makefile.unmap4
-rw-r--r--arch/um/sys-i386/Makefile6
-rw-r--r--arch/um/sys-i386/kernel-offsets.c1
-rw-r--r--arch/um/sys-i386/ldt.c114
-rw-r--r--arch/um/sys-i386/signal.c2
-rw-r--r--arch/um/sys-i386/stub.S51
-rw-r--r--arch/um/sys-i386/stub_segv.c29
-rw-r--r--arch/um/sys-i386/unmap.c2
-rw-r--r--arch/um/sys-x86_64/Makefile9
-rw-r--r--arch/um/sys-x86_64/kernel-offsets.c1
-rw-r--r--arch/um/sys-x86_64/signal.c43
-rw-r--r--arch/um/sys-x86_64/stub.S66
-rw-r--r--arch/um/sys-x86_64/stub_segv.c32
-rw-r--r--arch/um/sys-x86_64/unmap.c2
107 files changed, 3031 insertions, 1510 deletions
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 9469e77303e6..684e1f8b2755 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -73,7 +73,7 @@ config MODE_SKAS
to CONFIG_MODE_TT). Otherwise, it is safe to say Y. Disabling this
option will shrink the UML binary slightly.
-source "arch/um/Kconfig_arch"
+source "arch/um/Kconfig.arch"
source "mm/Kconfig"
config LD_SCRIPT_STATIC
@@ -128,7 +128,6 @@ config HOSTFS
config HPPFS
tristate "HoneyPot ProcFS (EXPERIMENTAL)"
- depends on BROKEN
help
hppfs (HoneyPot ProcFS) is a filesystem which allows UML /proc
entries to be overridden, removed, or fabricated from the host.
@@ -141,8 +140,9 @@ config HPPFS
You only need this if you are setting up a UML honeypot. Otherwise,
it is safe to say 'N' here.
- If you are actively using it, please ask for it to be fixed. In this
- moment, it does not work on 2.6 (it works somehow on 2.4).
+ If you are actively using it, please report any problems, since it's
+ getting fixed. In this moment, it is experimental on 2.6 (it works on
+ 2.4).
config MCONSOLE
bool "Management console"
@@ -196,7 +196,7 @@ config HOST_2G_2G
config SMP
bool "Symmetric multi-processing support (EXPERIMENTAL)"
default n
- depends on MODE_TT && EXPERIMENTAL
+ depends on (MODE_TT && EXPERIMENTAL && !SMP_BROKEN) || (BROKEN && SMP_BROKEN)
help
This option enables UML SMP support.
It is NOT related to having a real SMP box. Not directly, at least.
@@ -275,9 +275,11 @@ endmenu
source "init/Kconfig"
+source "net/Kconfig"
+
source "drivers/base/Kconfig"
-source "arch/um/Kconfig_char"
+source "arch/um/Kconfig.char"
source "drivers/block/Kconfig"
@@ -285,9 +287,9 @@ config NETDEVICES
bool
default NET
-source "arch/um/Kconfig_net"
+source "arch/um/Kconfig.net"
-source "net/Kconfig"
+source "drivers/net/Kconfig"
source "fs/Kconfig"
@@ -309,7 +311,7 @@ config GENERIC_ISA_DMA
depends on SCSI
default y
-source "arch/um/Kconfig_scsi"
+source "arch/um/Kconfig.scsi"
endmenu
diff --git a/arch/um/Kconfig_char b/arch/um/Kconfig.char
index 62d87b71179b..62d87b71179b 100644
--- a/arch/um/Kconfig_char
+++ b/arch/um/Kconfig.char
diff --git a/arch/um/Kconfig.debug b/arch/um/Kconfig.debug
index bd41e4286d0d..5681a8bd370b 100644
--- a/arch/um/Kconfig.debug
+++ b/arch/um/Kconfig.debug
@@ -2,6 +2,17 @@ menu "Kernel hacking"
source "lib/Kconfig.debug"
+config CMDLINE_ON_HOST
+ bool "Show command line arguments on the host in TT mode"
+ depends on MODE_TT
+ default !DEBUG_INFO
+ help
+ This controls whether arguments in guest processes should be shown on
+ the host's ps output.
+ Enabling this option hinders debugging on some recent GDB versions
+ (because GDB gets "confused" when we do an execvp()). So probably you
+ should disable it.
+
config PT_PROXY
bool "Enable ptrace proxy"
depends on XTERM_CHAN && DEBUG_INFO && MODE_TT
diff --git a/arch/um/Kconfig_i386 b/arch/um/Kconfig.i386
index e41f3748d30f..8ad156a00499 100644
--- a/arch/um/Kconfig_i386
+++ b/arch/um/Kconfig.i386
@@ -6,6 +6,10 @@ config 64BIT
bool
default n
+config SEMAPHORE_SLEEPERS
+ bool
+ default y
+
config TOP_ADDR
hex
default 0xc0000000 if !HOST_2G_2G
@@ -19,6 +23,18 @@ config 3_LEVEL_PGTABLES
memory. All the memory that can't be mapped directly will be treated
as high memory.
+config STUB_CODE
+ hex
+ default 0xbfffe000
+
+config STUB_DATA
+ hex
+ default 0xbffff000
+
+config STUB_START
+ hex
+ default STUB_CODE
+
config ARCH_HAS_SC_SIGNALS
bool
default y
diff --git a/arch/um/Kconfig_net b/arch/um/Kconfig.net
index 1c2f9a70d91d..14a04ebdeae9 100644
--- a/arch/um/Kconfig_net
+++ b/arch/um/Kconfig.net
@@ -34,7 +34,7 @@ config UML_NET_ETHERTAP
link with the host.
To use this, your host kernel must have support for Ethertap
- devices. Also, if your host kernel is 2.4.x, it must have
+ devices. Also, if your host kernel is 2.4.x, it must have
CONFIG_NETLINK_DEV configured as Y or M.
For more information, see
@@ -43,7 +43,7 @@ config UML_NET_ETHERTAP
networking.
If you'd like to set up an IP network with the host and/or the
- outside world, say Y to this, the Daemon Transport and/or the
+ outside world, say Y to this, the Daemon Transport and/or the
Slip Transport. You'll need at least one of them, but may choose
more than one without conflict. If you don't need UML networking,
say N.
@@ -78,7 +78,7 @@ config UML_NET_SLIP
The Ethertap Transport is preferred over slip because of its
limitations. If you prefer slip, however, say Y here. Otherwise
- choose the Multicast transport (to network multiple UMLs on
+ choose the Multicast transport (to network multiple UMLs on
multiple hosts), Ethertap (to network with the host and the
outside world), and/or the Daemon transport (to network multiple
UMLs on a single host). You may choose more than one without
@@ -135,10 +135,10 @@ config UML_NET_MCAST
config UML_NET_PCAP
bool "pcap transport"
- depends on UML_NET && BROKEN
+ depends on UML_NET && EXPERIMENTAL
help
The pcap transport makes a pcap packet stream on the host look
- like an ethernet device inside UML. This is useful for making
+ like an ethernet device inside UML. This is useful for making
UML act as a network monitor for the host. You must have libcap
installed in order to build the pcap transport into UML.
@@ -169,11 +169,11 @@ config UML_NET_SLIRP
setup string. The effect of this transport on the UML is similar
that of a host behind a firewall that masquerades all network
connections passing through it (but is less secure).
-
+
To use this you should first have slirp compiled somewhere
accessible on the host, and have read its documentation. If you
don't need UML networking, say N.
-
+
Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp"
endmenu
diff --git a/arch/um/Kconfig_scsi b/arch/um/Kconfig.scsi
index c291c942b1a8..c291c942b1a8 100644
--- a/arch/um/Kconfig_scsi
+++ b/arch/um/Kconfig.scsi
diff --git a/arch/um/Kconfig_x86_64 b/arch/um/Kconfig.x86_64
index f162f50f0b17..bd35e59419c8 100644
--- a/arch/um/Kconfig_x86_64
+++ b/arch/um/Kconfig.x86_64
@@ -6,6 +6,10 @@ config 64BIT
bool
default y
+config SEMAPHORE_SLEEPERS
+ bool
+ default y
+
config TOP_ADDR
hex
default 0x80000000
@@ -14,6 +18,18 @@ config 3_LEVEL_PGTABLES
bool
default y
+config STUB_CODE
+ hex
+ default 0x7fbfffe000
+
+config STUB_DATA
+ hex
+ default 0x7fbffff000
+
+config STUB_START
+ hex
+ default STUB_CODE
+
config ARCH_HAS_SC_SIGNALS
bool
default n
@@ -21,3 +37,7 @@ config ARCH_HAS_SC_SIGNALS
config ARCH_REUSE_HOST_VSYSCALL_AREA
bool
default n
+
+config SMP_BROKEN
+ bool
+ default y
diff --git a/arch/um/Makefile b/arch/um/Makefile
index 3f073902351f..ce987266dac6 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -51,25 +51,27 @@ MRPROPER_DIRS += $(ARCH_DIR)/include2
endif
SYS_DIR := $(ARCH_DIR)/include/sysdep-$(SUBARCH)
-include $(srctree)/$(ARCH_DIR)/Makefile-$(SUBARCH)
+# -Dvmap=kernel_vmap affects everything, and prevents anything from
+# referencing the libpcap.o symbol so named.
-core-y += $(SUBARCH_CORE)
-libs-y += $(SUBARCH_LIBS)
+CFLAGS += $(CFLAGS-y) -D__arch_um__ -DSUBARCH=\"$(SUBARCH)\" \
+ $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap
+AFLAGS += $(ARCH_INCLUDE)
+
+USER_CFLAGS := $(patsubst -I%,,$(CFLAGS))
+USER_CFLAGS := $(patsubst -D__KERNEL__,,$(USER_CFLAGS)) $(ARCH_INCLUDE) \
+ $(MODE_INCLUDE)
# -Derrno=kernel_errno - This turns all kernel references to errno into
# kernel_errno to separate them from the libc errno. This allows -fno-common
# in CFLAGS. Otherwise, it would cause ld to complain about the two different
# errnos.
-CFLAGS += $(CFLAGS-y) -D__arch_um__ -DSUBARCH=\"$(SUBARCH)\" \
- $(ARCH_INCLUDE) $(MODE_INCLUDE)
-
-USER_CFLAGS := $(patsubst -I%,,$(CFLAGS))
-USER_CFLAGS := $(patsubst -D__KERNEL__,,$(USER_CFLAGS)) $(ARCH_INCLUDE) \
- $(MODE_INCLUDE) $(ARCH_USER_CFLAGS)
CFLAGS += -Derrno=kernel_errno -Dsigprocmask=kernel_sigprocmask
CFLAGS += $(call cc-option,-fno-unit-at-a-time,)
+include $(srctree)/$(ARCH_DIR)/Makefile-$(SUBARCH)
+
#This will adjust *FLAGS accordingly to the platform.
include $(srctree)/$(ARCH_DIR)/Makefile-os-$(OS)
@@ -100,13 +102,12 @@ define archhelp
endef
ifneq ($(KBUILD_SRC),)
-$(shell mkdir -p $(ARCH_DIR) && ln -fsn $(srctree)/$(ARCH_DIR)/Kconfig_$(SUBARCH) $(ARCH_DIR)/Kconfig_arch)
-CLEAN_FILES += $(ARCH_DIR)/Kconfig_arch
+$(shell mkdir -p $(ARCH_DIR) && ln -fsn $(srctree)/$(ARCH_DIR)/Kconfig.$(SUBARCH) $(ARCH_DIR)/Kconfig.arch)
else
-$(shell cd $(ARCH_DIR) && ln -sf Kconfig_$(SUBARCH) Kconfig_arch)
+$(shell cd $(ARCH_DIR) && ln -sf Kconfig.$(SUBARCH) Kconfig.arch)
endif
-prepare: $(ARCH_SYMLINKS) $(SYS_HEADERS) $(GEN_HEADERS)
+archprepare: $(ARCH_SYMLINKS) $(SYS_HEADERS) $(GEN_HEADERS)
LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static
LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib
@@ -116,18 +117,19 @@ CONFIG_KERNEL_STACK_ORDER ?= 2
STACK_SIZE := $(shell echo $$[ 4096 * (1 << $(CONFIG_KERNEL_STACK_ORDER)) ] )
ifndef START
- START = $$(($(TOP_ADDR) - $(SIZE)))
+ START = $(shell echo $$[ $(TOP_ADDR) - $(SIZE) ] )
endif
-CPPFLAGS_vmlinux.lds = $(shell echo -U$(SUBARCH) \
+CPPFLAGS_vmlinux.lds = -U$(SUBARCH) \
-DSTART=$(START) -DELF_ARCH=$(ELF_ARCH) \
- -DELF_FORMAT=\"$(ELF_FORMAT)\" $(CPP_MODE-y) \
- -DKERNEL_STACK_SIZE=$(STACK_SIZE) -DSUBARCH=$(SUBARCH))
+ -DELF_FORMAT="$(ELF_FORMAT)" $(CPP_MODE-y) \
+ -DKERNEL_STACK_SIZE=$(STACK_SIZE) \
+ -DUNMAP_PATH=arch/um/sys-$(SUBARCH)/unmap_fin.o
#The wrappers will select whether using "malloc" or the kernel allocator.
LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc
-CFLAGS_vmlinux = $(LINK-y) $(LINK_WRAPS)
+CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS)
define cmd_vmlinux__
$(CC) $(CFLAGS_vmlinux) -o $@ \
-Wl,-T,$(vmlinux-lds) $(vmlinux-init) \
@@ -140,14 +142,15 @@ endef
#When cleaning we don't include .config, so we don't include
#TT or skas makefiles and don't clean skas_ptregs.h.
CLEAN_FILES += linux x.i gmon.out $(ARCH_DIR)/include/uml-config.h \
- $(GEN_HEADERS) $(ARCH_DIR)/include/skas_ptregs.h
+ $(GEN_HEADERS) $(ARCH_DIR)/include/skas_ptregs.h \
+ $(ARCH_DIR)/include/user_constants.h $(ARCH_DIR)/Kconfig.arch
MRPROPER_FILES += $(SYMLINK_HEADERS) $(ARCH_SYMLINKS) \
- $(addprefix $(ARCH_DIR)/kernel/,$(KERN_SYMLINKS)) $(ARCH_DIR)/os \
- $(ARCH_DIR)/Kconfig_arch
+ $(addprefix $(ARCH_DIR)/kernel/,$(KERN_SYMLINKS)) $(ARCH_DIR)/os
archclean:
$(Q)$(MAKE) $(clean)=$(ARCH_DIR)/util
+ $(Q)$(MAKE) $(clean)=$(ARCH_DIR)/os-$(OS)/util
@find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \
-o -name '*.gcov' \) -type f -print | xargs rm -f
@@ -193,6 +196,22 @@ define filechk_umlconfig
sed 's/ CONFIG/ UML_CONFIG/'
endef
+define filechk_gen-asm-offsets
+ (set -e; \
+ echo "#ifndef __ASM_OFFSETS_H__"; \
+ echo "#define __ASM_OFFSETS_H__"; \
+ echo "/*"; \
+ echo " * DO NOT MODIFY."; \
+ echo " *"; \
+ echo " * This file was generated by arch/$(ARCH)/Makefile"; \
+ echo " *"; \
+ echo " */"; \
+ echo ""; \
+ sed -ne "/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"; \
+ echo ""; \
+ echo "#endif" )
+endef
+
$(ARCH_DIR)/include/uml-config.h : include/linux/autoconf.h
$(call filechk,umlconfig)
@@ -242,7 +261,7 @@ $(ARCH_DIR)/util: scripts_basic $(SYS_DIR)/sc.h $(ARCH_DIR)/kernel-offsets.h FOR
$(ARCH_DIR)/kernel/skas/util: scripts_basic $(ARCH_DIR)/user-offsets.h FORCE
$(Q)$(MAKE) $(build)=$@
-$(ARCH_DIR)/os-$(OS)/util: scripts_basic FORCE
+$(ARCH_DIR)/os-$(OS)/util: scripts_basic $(ARCH_DIR)/user-offsets.h FORCE
$(Q)$(MAKE) $(build)=$@
export SUBARCH USER_CFLAGS OS
diff --git a/arch/um/Makefile-i386 b/arch/um/Makefile-i386
index 29e182d5a83a..1ab431a53ac3 100644
--- a/arch/um/Makefile-i386
+++ b/arch/um/Makefile-i386
@@ -1,4 +1,4 @@
-SUBARCH_CORE := arch/um/sys-i386/ arch/i386/crypto/
+core-y += arch/um/sys-i386/ arch/i386/crypto/
TOP_ADDR := $(CONFIG_TOP_ADDR)
@@ -8,21 +8,33 @@ ifeq ($(CONFIG_MODE_SKAS),y)
endif
endif
+LDFLAGS += -m elf_i386
+ELF_ARCH := $(SUBARCH)
+ELF_FORMAT := elf32-$(SUBARCH)
+OBJCOPYFLAGS := -O binary -R .note -R .comment -S
+
+ifeq ("$(origin SUBARCH)", "command line")
+ifneq ("$(shell uname -m | sed -e s/i.86/i386/)", "$(SUBARCH)")
+CFLAGS += $(call cc-option,-m32)
+USER_CFLAGS += $(call cc-option,-m32)
+HOSTCFLAGS += $(call cc-option,-m32)
+HOSTLDFLAGS += $(call cc-option,-m32)
+AFLAGS += $(call cc-option,-m32)
+LINK-y += $(call cc-option,-m32)
+UML_OBJCOPYFLAGS += -F $(ELF_FORMAT)
+
+export LDFLAGS HOSTCFLAGS HOSTLDFLAGS UML_OBJCOPYFLAGS
+endif
+endif
+
CFLAGS += -U__$(SUBARCH)__ -U$(SUBARCH)
-ARCH_USER_CFLAGS :=
ifneq ($(CONFIG_GPROF),y)
ARCH_CFLAGS += -DUM_FASTCALL
endif
-ELF_ARCH := $(SUBARCH)
-ELF_FORMAT := elf32-$(SUBARCH)
-
-OBJCOPYFLAGS := -O binary -R .note -R .comment -S
-
SYS_UTIL_DIR := $(ARCH_DIR)/sys-i386/util
-
-SYS_HEADERS := $(SYS_DIR)/sc.h $(SYS_DIR)/thread.h
+SYS_HEADERS := $(SYS_DIR)/sc.h $(SYS_DIR)/thread.h
prepare: $(SYS_HEADERS)
diff --git a/arch/um/Makefile-x86_64 b/arch/um/Makefile-x86_64
index 32144562c279..436abbba409b 100644
--- a/arch/um/Makefile-x86_64
+++ b/arch/um/Makefile-x86_64
@@ -1,11 +1,14 @@
# Copyright 2003 - 2004 Pathscale, Inc
# Released under the GPL
-SUBARCH_LIBS := arch/um/sys-x86_64/
+libs-y += arch/um/sys-x86_64/
START := 0x60000000
+#We #undef __x86_64__ for kernelspace, not for userspace where
+#it's needed for headers to work!
CFLAGS += -U__$(SUBARCH)__ -fno-builtin
-ARCH_USER_CFLAGS := -D__x86_64__
+USER_CFLAGS += -fno-builtin
+CHECKFLAGS += -m64
ELF_ARCH := i386:x86-64
ELF_FORMAT := elf64-x86-64
diff --git a/arch/um/defconfig b/arch/um/defconfig
index 4067c3aa5b60..80d30d19d750 100644
--- a/arch/um/defconfig
+++ b/arch/um/defconfig
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.12-rc3-skas3-v9-pre2
-# Sun Apr 24 19:46:10 2005
+# Linux kernel version: 2.6.12-rc6-mm1
+# Tue Jun 14 18:22:21 2005
#
CONFIG_GENERIC_HARDIRQS=y
CONFIG_UML=y
@@ -13,23 +13,32 @@ CONFIG_GENERIC_CALIBRATE_DELAY=y
#
# UML-specific options
#
-CONFIG_MODE_TT=y
+# CONFIG_MODE_TT is not set
+# CONFIG_STATIC_LINK is not set
CONFIG_MODE_SKAS=y
CONFIG_UML_X86=y
# CONFIG_64BIT is not set
CONFIG_TOP_ADDR=0xc0000000
# CONFIG_3_LEVEL_PGTABLES is not set
+CONFIG_STUB_CODE=0xbfffe000
+CONFIG_STUB_DATA=0xbffff000
+CONFIG_STUB_START=0xbfffe000
CONFIG_ARCH_HAS_SC_SIGNALS=y
CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA=y
-CONFIG_LD_SCRIPT_STATIC=y
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_LD_SCRIPT_DYN=y
CONFIG_NET=y
CONFIG_BINFMT_ELF=y
CONFIG_BINFMT_MISC=m
-CONFIG_HOSTFS=y
+# CONFIG_HOSTFS is not set
CONFIG_MCONSOLE=y
# CONFIG_MAGIC_SYSRQ is not set
# CONFIG_HOST_2G_2G is not set
-# CONFIG_SMP is not set
CONFIG_NEST_LEVEL=0
CONFIG_KERNEL_HALF_GIGS=1
# CONFIG_HIGHMEM is not set
@@ -63,6 +72,8 @@ CONFIG_IKCONFIG_PROC=y
CONFIG_KALLSYMS=y
# CONFIG_KALLSYMS_ALL is not set
CONFIG_KALLSYMS_EXTRA_PASS=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
CONFIG_EPOLL=y
@@ -81,6 +92,7 @@ CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_MODULE_FORCE_UNLOAD is not set
CONFIG_OBSOLETE_MODPARM=y
+# CONFIG_MODVERSIONS is not set
# CONFIG_MODULE_SRCVERSION_ALL is not set
CONFIG_KMOD=y
@@ -115,6 +127,7 @@ CONFIG_UML_SOUND=m
CONFIG_SOUND=m
CONFIG_HOSTAUDIO=m
CONFIG_UML_RANDOM=y
+# CONFIG_MMAPPER is not set
#
# Block devices
@@ -176,6 +189,17 @@ CONFIG_INET=y
# CONFIG_INET_TUNNEL is not set
CONFIG_IP_TCPDIAG=y
# CONFIG_IP_TCPDIAG_IPV6 is not set
+
+#
+# TCP congestion control
+#
+CONFIG_TCP_CONG_BIC=y
+CONFIG_TCP_CONG_WESTWOOD=y
+CONFIG_TCP_CONG_HTCP=y
+# CONFIG_TCP_CONG_HSTCP is not set
+# CONFIG_TCP_CONG_HYBLA is not set
+# CONFIG_TCP_CONG_VEGAS is not set
+# CONFIG_TCP_CONG_SCALABLE is not set
# CONFIG_IPV6 is not set
# CONFIG_NETFILTER is not set
@@ -206,11 +230,15 @@ CONFIG_IP_TCPDIAG=y
# Network testing
#
# CONFIG_NET_PKTGEN is not set
+# CONFIG_KGDBOE is not set
# CONFIG_NETPOLL is not set
+# CONFIG_NETPOLL_RX is not set
+# CONFIG_NETPOLL_TRAP is not set
# CONFIG_NET_POLL_CONTROLLER is not set
# CONFIG_HAMRADIO is not set
# CONFIG_IRDA is not set
# CONFIG_BT is not set
+# CONFIG_IEEE80211 is not set
CONFIG_DUMMY=m
# CONFIG_BONDING is not set
# CONFIG_EQUALIZER is not set
@@ -227,6 +255,7 @@ CONFIG_PPP=m
# CONFIG_PPP_SYNC_TTY is not set
# CONFIG_PPP_DEFLATE is not set
# CONFIG_PPP_BSDCOMP is not set
+# CONFIG_PPP_MPPE is not set
# CONFIG_PPPOE is not set
CONFIG_SLIP=m
# CONFIG_SLIP_COMPRESSED is not set
@@ -240,10 +269,12 @@ CONFIG_SLIP=m
#
CONFIG_EXT2_FS=y
# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT2_FS_XIP is not set
CONFIG_EXT3_FS=y
# CONFIG_EXT3_FS_XATTR is not set
CONFIG_JBD=y
# CONFIG_JBD_DEBUG is not set
+# CONFIG_REISER4_FS is not set
CONFIG_REISERFS_FS=y
# CONFIG_REISERFS_CHECK is not set
# CONFIG_REISERFS_PROC_INFO is not set
@@ -256,6 +287,7 @@ CONFIG_REISERFS_FS=y
# CONFIG_XFS_FS is not set
# CONFIG_MINIX_FS is not set
# CONFIG_ROMFS_FS is not set
+CONFIG_INOTIFY=y
CONFIG_QUOTA=y
# CONFIG_QFMT_V1 is not set
# CONFIG_QFMT_V2 is not set
@@ -265,6 +297,12 @@ CONFIG_AUTOFS_FS=m
CONFIG_AUTOFS4_FS=m
#
+# Caches
+#
+# CONFIG_FSCACHE is not set
+# CONFIG_FUSE_FS is not set
+
+#
# CD-ROM/DVD Filesystems
#
CONFIG_ISO9660_FS=m
@@ -291,6 +329,8 @@ CONFIG_TMPFS=y
# CONFIG_TMPFS_XATTR is not set
# CONFIG_HUGETLB_PAGE is not set
CONFIG_RAMFS=y
+# CONFIG_CONFIGFS_FS is not set
+# CONFIG_RELAYFS_FS is not set
#
# Miscellaneous filesystems
@@ -319,6 +359,7 @@ CONFIG_RAMFS=y
# CONFIG_NCP_FS is not set
# CONFIG_CODA_FS is not set
# CONFIG_AFS_FS is not set
+# CONFIG_9P_FS is not set
#
# Partition Types
@@ -404,14 +445,15 @@ CONFIG_CRC32=m
# CONFIG_PRINTK_TIME is not set
CONFIG_DEBUG_KERNEL=y
CONFIG_LOG_BUF_SHIFT=14
+CONFIG_DETECT_SOFTLOCKUP=y
# CONFIG_SCHEDSTATS is not set
-# CONFIG_DEBUG_SLAB is not set
+CONFIG_DEBUG_SLAB=y
# CONFIG_DEBUG_SPINLOCK is not set
# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
# CONFIG_DEBUG_KOBJECT is not set
CONFIG_DEBUG_INFO=y
# CONFIG_DEBUG_FS is not set
CONFIG_FRAME_POINTER=y
-CONFIG_PT_PROXY=y
+# CONFIG_GPROF is not set
# CONFIG_GCOV is not set
# CONFIG_SYSCALL_DEBUG is not set
diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile
index b2de9916c32c..783e18cae090 100644
--- a/arch/um/drivers/Makefile
+++ b/arch/um/drivers/Makefile
@@ -10,14 +10,26 @@ slip-objs := slip_kern.o slip_user.o
slirp-objs := slirp_kern.o slirp_user.o
daemon-objs := daemon_kern.o daemon_user.o
mcast-objs := mcast_kern.o mcast_user.o
-#pcap-objs := pcap_kern.o pcap_user.o $(PCAP)
net-objs := net_kern.o net_user.o
mconsole-objs := mconsole_kern.o mconsole_user.o
hostaudio-objs := hostaudio_kern.o
-ubd-objs := ubd_kern.o ubd_user.o
+ubd-objs := ubd_kern.o
port-objs := port_kern.o port_user.o
harddog-objs := harddog_kern.o harddog_user.o
+LDFLAGS_pcap.o := -r $(shell $(CC) $(CFLAGS) -print-file-name=libpcap.a)
+
+targets := pcap_kern.o pcap_user.o
+
+$(obj)/pcap.o: $(obj)/pcap_kern.o $(obj)/pcap_user.o
+ $(LD) -r -dp -o $@ $^ $(LDFLAGS) $(LDFLAGS_pcap.o)
+#XXX: The call below does not work because the flags are added before the
+# object name, so nothing from the library gets linked.
+#$(call if_changed,ld)
+
+# When the above is fixed, don't forget to add this too!
+#targets += $(obj)/pcap.o
+
obj-y := stdio_console.o fd.o chan_kern.o chan_user.o line.o
obj-$(CONFIG_SSL) += ssl.o
obj-$(CONFIG_STDERR_CONSOLE) += stderr_console.o
@@ -26,7 +38,7 @@ obj-$(CONFIG_UML_NET_SLIP) += slip.o slip_common.o
obj-$(CONFIG_UML_NET_SLIRP) += slirp.o slip_common.o
obj-$(CONFIG_UML_NET_DAEMON) += daemon.o
obj-$(CONFIG_UML_NET_MCAST) += mcast.o
-#obj-$(CONFIG_UML_NET_PCAP) += pcap.o $(PCAP)
+obj-$(CONFIG_UML_NET_PCAP) += pcap.o
obj-$(CONFIG_UML_NET) += net.o
obj-$(CONFIG_MCONSOLE) += mconsole.o
obj-$(CONFIG_MMAPPER) += mmapper_kern.o
@@ -41,6 +53,7 @@ obj-$(CONFIG_UML_WATCHDOG) += harddog.o
obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
obj-$(CONFIG_UML_RANDOM) += random.o
-USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o
+# pcap_user.o must be added explicitly.
+USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o
include arch/um/scripts/Makefile.rules
diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c
index 5d3768156c92..de3bce71aeb3 100644
--- a/arch/um/drivers/chan_user.c
+++ b/arch/um/drivers/chan_user.c
@@ -63,7 +63,7 @@ error:
*
* SIGWINCH can't be received synchronously, so you have to set up to receive it
* as a signal. That being the case, if you are going to wait for it, it is
- * convenient to sit in a pause() and wait for the signal to bounce you out of
+ * convenient to sit in sigsuspend() and wait for the signal to bounce you out of
* it (see below for how we make sure to exit only on SIGWINCH).
*/
@@ -94,18 +94,19 @@ static int winch_thread(void *arg)
"byte, err = %d\n", -count);
/* We are not using SIG_IGN on purpose, so don't fix it as I thought to
- * do! If using SIG_IGN, the pause() call below would not stop on
+ * do! If using SIG_IGN, the sigsuspend() call below would not stop on
* SIGWINCH. */
signal(SIGWINCH, winch_handler);
sigfillset(&sigs);
- sigdelset(&sigs, SIGWINCH);
- /* Block anything else than SIGWINCH. */
+ /* Block all signals possible. */
if(sigprocmask(SIG_SETMASK, &sigs, NULL) < 0){
printk("winch_thread : sigprocmask failed, errno = %d\n",
errno);
exit(1);
}
+ /* In sigsuspend(), block anything else than SIGWINCH. */
+ sigdelset(&sigs, SIGWINCH);
if(setsid() < 0){
printk("winch_thread : setsid failed, errno = %d\n", errno);
@@ -130,7 +131,7 @@ static int winch_thread(void *arg)
while(1){
/* This will be interrupted by SIGWINCH only, since other signals
* are blocked.*/
- pause();
+ sigsuspend(&sigs);
count = os_write_file(pipe_fd, &c, sizeof(c));
if(count != sizeof(c))
diff --git a/arch/um/drivers/cow.h b/arch/um/drivers/cow.h
index 4fcbe8b1b77e..4fcf3a8d13f4 100644
--- a/arch/um/drivers/cow.h
+++ b/arch/um/drivers/cow.h
@@ -3,10 +3,10 @@
#include <asm/types.h>
-#if __BYTE_ORDER == __BIG_ENDIAN
+#if defined(__BIG_ENDIAN)
# define ntohll(x) (x)
# define htonll(x) (x)
-#elif __BYTE_ORDER == __LITTLE_ENDIAN
+#elif defined(__LITTLE_ENDIAN)
# define ntohll(x) bswap_64(x)
# define htonll(x) bswap_64(x)
#else
diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c
index d5742783e19d..59602b81b240 100644
--- a/arch/um/drivers/hostaudio_kern.c
+++ b/arch/um/drivers/hostaudio_kern.c
@@ -57,10 +57,10 @@ __uml_setup("mixer=", set_mixer, "mixer=<mixer device>\n" MIXER_HELP);
#else /*MODULE*/
-MODULE_PARM(dsp, "s");
+module_param(dsp, charp, 0644);
MODULE_PARM_DESC(dsp, DSP_HELP);
-MODULE_PARM(mixer, "s");
+module_param(mixer, charp, 0644);
MODULE_PARM_DESC(mixer, MIXER_HELP);
#endif
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 2bb4c4f5dec4..e0fdffa2d542 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -663,11 +663,15 @@ struct tty_driver *line_register_devfs(struct lines *set,
return driver;
}
+static spinlock_t winch_handler_lock;
+LIST_HEAD(winch_handlers);
+
void lines_init(struct line *lines, int nlines)
{
struct line *line;
int i;
+ spin_lock_init(&winch_handler_lock);
for(i = 0; i < nlines; i++){
line = &lines[i];
INIT_LIST_HEAD(&line->chan_list);
@@ -724,31 +728,30 @@ irqreturn_t winch_interrupt(int irq, void *data, struct pt_regs *unused)
return IRQ_HANDLED;
}
-DECLARE_MUTEX(winch_handler_sem);
-LIST_HEAD(winch_handlers);
-
void register_winch_irq(int fd, int tty_fd, int pid, struct tty_struct *tty)
{
struct winch *winch;
- down(&winch_handler_sem);
winch = kmalloc(sizeof(*winch), GFP_KERNEL);
if (winch == NULL) {
printk("register_winch_irq - kmalloc failed\n");
- goto out;
+ return;
}
+
*winch = ((struct winch) { .list = LIST_HEAD_INIT(winch->list),
.fd = fd,
.tty_fd = tty_fd,
.pid = pid,
.tty = tty });
+
+ spin_lock(&winch_handler_lock);
list_add(&winch->list, &winch_handlers);
+ spin_unlock(&winch_handler_lock);
+
if(um_request_irq(WINCH_IRQ, fd, IRQ_READ, winch_interrupt,
SA_INTERRUPT | SA_SHIRQ | SA_SAMPLE_RANDOM,
"winch", winch) < 0)
printk("register_winch_irq - failed to register IRQ\n");
- out:
- up(&winch_handler_sem);
}
static void unregister_winch(struct tty_struct *tty)
@@ -756,7 +759,7 @@ static void unregister_winch(struct tty_struct *tty)
struct list_head *ele;
struct winch *winch, *found = NULL;
- down(&winch_handler_sem);
+ spin_lock(&winch_handler_lock);
list_for_each(ele, &winch_handlers){
winch = list_entry(ele, struct winch, list);
if(winch->tty == tty){
@@ -764,20 +767,25 @@ static void unregister_winch(struct tty_struct *tty)
break;
}
}
-
if(found == NULL)
- goto out;
+ goto err;
+
+ list_del(&winch->list);
+ spin_unlock(&winch_handler_lock);
if(winch->pid != -1)
os_kill_process(winch->pid, 1);
free_irq(WINCH_IRQ, winch);
- list_del(&winch->list);
kfree(winch);
- out:
- up(&winch_handler_sem);
+
+ return;
+err:
+ spin_unlock(&winch_handler_lock);
}
+/* XXX: No lock as it's an exitcall... is this valid? Depending on cleanup
+ * order... are we sure that nothing else is done on the list? */
static void winch_cleanup(void)
{
struct list_head *ele;
@@ -786,6 +794,9 @@ static void winch_cleanup(void)
list_for_each(ele, &winch_handlers){
winch = list_entry(ele, struct winch, list);
if(winch->fd != -1){
+ /* Why is this different from the above free_irq(),
+ * which deactivates SIGIO? This searches the FD
+ * somewhere else and removes it from the list... */
deactivate_fd(winch->fd, WINCH_IRQ);
os_close_file(winch->fd);
}
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 404de41a4f67..c190c2414197 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -557,7 +557,7 @@ static int create_proc_mconsole(void)
ent = create_proc_entry("mconsole", S_IFREG | 0200, NULL);
if(ent == NULL){
- printk("create_proc_mconsole : create_proc_entry failed\n");
+ printk(KERN_INFO "create_proc_mconsole : create_proc_entry failed\n");
return(0);
}
diff --git a/arch/um/drivers/mmapper_kern.c b/arch/um/drivers/mmapper_kern.c
index a37a5ac13c22..022f67bb6873 100644
--- a/arch/um/drivers/mmapper_kern.c
+++ b/arch/um/drivers/mmapper_kern.c
@@ -9,19 +9,11 @@
*
*/
-#include <linux/types.h>
-#include <linux/kdev_t.h>
-#include <linux/time.h>
-#include <linux/devfs_fs_kernel.h>
+#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/smp_lock.h>
#include <linux/miscdevice.h>
#include <asm/uaccess.h>
-#include <asm/irq.h>
-#include <asm/pgtable.h>
#include "mem_user.h"
#include "user_util.h"
@@ -31,35 +23,22 @@ static unsigned long p_buf = 0;
static char *v_buf = NULL;
static ssize_t
-mmapper_read(struct file *file, char *buf, size_t count, loff_t *ppos)
+mmapper_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
- if(*ppos > mmapper_size)
- return -EINVAL;
-
- if(count + *ppos > mmapper_size)
- count = count + *ppos - mmapper_size;
-
- if(count < 0)
- return -EINVAL;
-
- copy_to_user(buf,&v_buf[*ppos],count);
-
- return count;
+ return simple_read_from_buffer(buf, count, ppos, v_buf, mmapper_size);
}
static ssize_t
-mmapper_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
+mmapper_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
{
- if(*ppos > mmapper_size)
+ if (*ppos > mmapper_size)
return -EINVAL;
- if(count + *ppos > mmapper_size)
- count = count + *ppos - mmapper_size;
-
- if(count < 0)
- return -EINVAL;
+ if (count > mmapper_size - *ppos)
+ count = mmapper_size - *ppos;
- copy_from_user(&v_buf[*ppos],buf,count);
+ if (copy_from_user(&v_buf[*ppos], buf, count))
+ return -EFAULT;
return count;
}
@@ -77,7 +56,6 @@ mmapper_mmap(struct file *file, struct vm_area_struct * vma)
int ret = -EINVAL;
int size;
- lock_kernel();
if (vma->vm_pgoff != 0)
goto out;
@@ -92,7 +70,6 @@ mmapper_mmap(struct file *file, struct vm_area_struct * vma)
goto out;
ret = 0;
out:
- unlock_kernel();
return ret;
}
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 344b24d09a7c..e77a38da4350 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -35,6 +35,7 @@
#include "linux/blkpg.h"
#include "linux/genhd.h"
#include "linux/spinlock.h"
+#include "asm/atomic.h"
#include "asm/segment.h"
#include "asm/uaccess.h"
#include "asm/irq.h"
@@ -53,20 +54,21 @@
#include "mem.h"
#include "mem_kern.h"
#include "cow.h"
+#include "aio.h"
enum ubd_req { UBD_READ, UBD_WRITE };
struct io_thread_req {
- enum ubd_req op;
+ enum aio_type op;
int fds[2];
unsigned long offsets[2];
unsigned long long offset;
unsigned long length;
char *buffer;
int sectorsize;
- unsigned long sector_mask;
- unsigned long long cow_offset;
- unsigned long bitmap_words[2];
+ int bitmap_offset;
+ long bitmap_start;
+ long bitmap_end;
int error;
};
@@ -80,28 +82,31 @@ extern int create_cow_file(char *cow_file, char *backing_file,
unsigned long *bitmap_len_out,
int *data_offset_out);
extern int read_cow_bitmap(int fd, void *buf, int offset, int len);
-extern void do_io(struct io_thread_req *req);
+extern void do_io(struct io_thread_req *req, struct request *r,
+ unsigned long *bitmap);
-static inline int ubd_test_bit(__u64 bit, unsigned char *data)
+static inline int ubd_test_bit(__u64 bit, void *data)
{
+ unsigned char *buffer = data;
__u64 n;
int bits, off;
- bits = sizeof(data[0]) * 8;
+ bits = sizeof(buffer[0]) * 8;
n = bit / bits;
off = bit % bits;
- return((data[n] & (1 << off)) != 0);
+ return((buffer[n] & (1 << off)) != 0);
}
-static inline void ubd_set_bit(__u64 bit, unsigned char *data)
+static inline void ubd_set_bit(__u64 bit, void *data)
{
+ unsigned char *buffer = data;
__u64 n;
int bits, off;
- bits = sizeof(data[0]) * 8;
+ bits = sizeof(buffer[0]) * 8;
n = bit / bits;
off = bit % bits;
- data[n] |= (1 << off);
+ buffer[n] |= (1 << off);
}
/*End stuff from ubd_user.h*/
@@ -110,8 +115,6 @@ static inline void ubd_set_bit(__u64 bit, unsigned char *data)
static DEFINE_SPINLOCK(ubd_io_lock);
static DEFINE_SPINLOCK(ubd_lock);
-static void (*do_ubd)(void);
-
static int ubd_open(struct inode * inode, struct file * filp);
static int ubd_release(struct inode * inode, struct file * file);
static int ubd_ioctl(struct inode * inode, struct file * file,
@@ -158,6 +161,8 @@ struct cow {
int data_offset;
};
+#define MAX_SG 64
+
struct ubd {
char *file;
int count;
@@ -168,6 +173,7 @@ struct ubd {
int no_cow;
struct cow cow;
struct platform_device pdev;
+ struct scatterlist sg[MAX_SG];
};
#define DEFAULT_COW { \
@@ -460,80 +466,113 @@ __uml_help(fakehd,
);
static void do_ubd_request(request_queue_t * q);
-
-/* Only changed by ubd_init, which is an initcall. */
-int thread_fd = -1;
+static int in_ubd;
/* Changed by ubd_handler, which is serialized because interrupts only
* happen on CPU 0.
*/
int intr_count = 0;
-/* call ubd_finish if you need to serialize */
-static void __ubd_finish(struct request *req, int error)
+static void ubd_end_request(struct request *req, int bytes, int uptodate)
{
- int nsect;
-
- if(error){
- end_request(req, 0);
- return;
+ if (!end_that_request_first(req, uptodate, bytes >> 9)) {
+ add_disk_randomness(req->rq_disk);
+ end_that_request_last(req);
}
- nsect = req->current_nr_sectors;
- req->sector += nsect;
- req->buffer += nsect << 9;
- req->errors = 0;
- req->nr_sectors -= nsect;
- req->current_nr_sectors = 0;
- end_request(req, 1);
}
-static inline void ubd_finish(struct request *req, int error)
+/* call ubd_finish if you need to serialize */
+static void __ubd_finish(struct request *req, int bytes)
{
- spin_lock(&ubd_io_lock);
- __ubd_finish(req, error);
- spin_unlock(&ubd_io_lock);
+ if(bytes < 0){
+ ubd_end_request(req, 0, 0);
+ return;
+ }
+
+ ubd_end_request(req, bytes, 1);
}
-/* Called without ubd_io_lock held */
-static void ubd_handler(void)
+static inline void ubd_finish(struct request *req, int bytes)
{
- struct io_thread_req req;
- struct request *rq = elv_next_request(ubd_queue);
- int n;
-
- do_ubd = NULL;
- intr_count++;
- n = os_read_file(thread_fd, &req, sizeof(req));
- if(n != sizeof(req)){
- printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, "
- "err = %d\n", os_getpid(), -n);
- spin_lock(&ubd_io_lock);
- end_request(rq, 0);
- spin_unlock(&ubd_io_lock);
- return;
- }
-
- ubd_finish(rq, req.error);
- reactivate_fd(thread_fd, UBD_IRQ);
- do_ubd_request(ubd_queue);
+ spin_lock(&ubd_io_lock);
+ __ubd_finish(req, bytes);
+ spin_unlock(&ubd_io_lock);
}
+struct bitmap_io {
+ atomic_t count;
+ struct aio_context aio;
+};
+
+struct ubd_aio {
+ struct aio_context aio;
+ struct request *req;
+ int len;
+ struct bitmap_io *bitmap;
+ void *bitmap_buf;
+};
+
+static int ubd_reply_fd = -1;
+
static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused)
{
- ubd_handler();
- return(IRQ_HANDLED);
-}
+ struct aio_thread_reply reply;
+ struct ubd_aio *aio;
+ struct request *req;
+ int err, n, fd = (int) (long) dev;
+
+ while(1){
+ err = os_read_file(fd, &reply, sizeof(reply));
+ if(err == -EAGAIN)
+ break;
+ if(err < 0){
+ printk("ubd_aio_handler - read returned err %d\n",
+ -err);
+ break;
+ }
-/* Only changed by ubd_init, which is an initcall. */
-static int io_pid = -1;
+ aio = container_of(reply.data, struct ubd_aio, aio);
+ n = reply.err;
-void kill_io_thread(void)
-{
- if(io_pid != -1)
- os_kill_process(io_pid, 1);
-}
+ if(n == 0){
+ req = aio->req;
+ req->nr_sectors -= aio->len >> 9;
+
+ if((aio->bitmap != NULL) &&
+ (atomic_dec_and_test(&aio->bitmap->count))){
+ aio->aio = aio->bitmap->aio;
+ aio->len = 0;
+ kfree(aio->bitmap);
+ aio->bitmap = NULL;
+ submit_aio(&aio->aio);
+ }
+ else {
+ if((req->nr_sectors == 0) &&
+ (aio->bitmap == NULL)){
+ int len = req->hard_nr_sectors << 9;
+ ubd_finish(req, len);
+ }
+
+ if(aio->bitmap_buf != NULL)
+ kfree(aio->bitmap_buf);
+ kfree(aio);
+ }
+ }
+ else if(n < 0){
+ ubd_finish(aio->req, n);
+ if(aio->bitmap != NULL)
+ kfree(aio->bitmap);
+ if(aio->bitmap_buf != NULL)
+ kfree(aio->bitmap_buf);
+ kfree(aio);
+ }
+ }
+ reactivate_fd(fd, UBD_IRQ);
-__uml_exitcall(kill_io_thread);
+ do_ubd_request(ubd_queue);
+
+ return(IRQ_HANDLED);
+}
static int ubd_file_size(struct ubd *dev, __u64 *size_out)
{
@@ -569,7 +608,7 @@ static int ubd_open_dev(struct ubd *dev)
&dev->cow.data_offset, create_ptr);
if((dev->fd == -ENOENT) && create_cow){
- dev->fd = create_cow_file(dev->file, dev->cow.file,
+ dev->fd = create_cow_file(dev->file, dev->cow.file,
dev->openflags, 1 << 9, PAGE_SIZE,
&dev->cow.bitmap_offset,
&dev->cow.bitmap_len,
@@ -668,21 +707,22 @@ static int ubd_add(int n)
struct ubd *dev = &ubd_dev[n];
int err;
+ err = -ENODEV;
if(dev->file == NULL)
- return(-ENODEV);
+ goto out;
if (ubd_open_dev(dev))
- return(-ENODEV);
+ goto out;
err = ubd_file_size(dev, &dev->size);
if(err < 0)
- return(err);
+ goto out_close;
dev->size = ROUND_BLOCK(dev->size);
err = ubd_new_disk(MAJOR_NR, dev->size, n, &ubd_gendisk[n]);
if(err)
- return(err);
+ goto out_close;
if(fake_major != MAJOR_NR)
ubd_new_disk(fake_major, dev->size, n,
@@ -693,8 +733,11 @@ static int ubd_add(int n)
if (fake_ide)
make_ide_entries(ubd_gendisk[n]->disk_name);
+ err = 0;
+out_close:
ubd_close(dev);
- return 0;
+out:
+ return err;
}
static int ubd_config(char *str)
@@ -827,6 +870,10 @@ int ubd_init(void)
{
int i;
+ ubd_reply_fd = init_aio_irq(UBD_IRQ, "ubd", ubd_intr);
+ if(ubd_reply_fd < 0)
+ printk("Setting up ubd AIO failed, err = %d\n", ubd_reply_fd);
+
devfs_mk_dir("ubd");
if (register_blkdev(MAJOR_NR, "ubd"))
return -1;
@@ -837,6 +884,7 @@ int ubd_init(void)
return -1;
}
+ blk_queue_max_hw_segments(ubd_queue, MAX_SG);
if (fake_major != MAJOR_NR) {
char name[sizeof("ubd_nnn\0")];
@@ -848,40 +896,12 @@ int ubd_init(void)
driver_register(&ubd_driver);
for (i = 0; i < MAX_DEV; i++)
ubd_add(i);
+
return 0;
}
late_initcall(ubd_init);
-int ubd_driver_init(void){
- unsigned long stack;
- int err;
-
- /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
- if(global_openflags.s){
- printk(KERN_INFO "ubd: Synchronous mode\n");
- /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
- * enough. So use anyway the io thread. */
- }
- stack = alloc_stack(0, 0);
- io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
- &thread_fd);
- if(io_pid < 0){
- printk(KERN_ERR
- "ubd : Failed to start I/O thread (errno = %d) - "
- "falling back to synchronous I/O\n", -io_pid);
- io_pid = -1;
- return(0);
- }
- err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
- SA_INTERRUPT, "ubd", ubd_dev);
- if(err != 0)
- printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
- return(err);
-}
-
-device_initcall(ubd_driver_init);
-
static int ubd_open(struct inode *inode, struct file *filp)
{
struct gendisk *disk = inode->i_bdev->bd_disk;
@@ -919,105 +939,55 @@ static int ubd_release(struct inode * inode, struct file * file)
return(0);
}
-static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
- __u64 *cow_offset, unsigned long *bitmap,
- __u64 bitmap_offset, unsigned long *bitmap_words,
- __u64 bitmap_len)
+static void cowify_bitmap(struct io_thread_req *req, unsigned long *bitmap)
{
- __u64 sector = io_offset >> 9;
- int i, update_bitmap = 0;
-
- for(i = 0; i < length >> 9; i++){
- if(cow_mask != NULL)
- ubd_set_bit(i, (unsigned char *) cow_mask);
- if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
- continue;
-
- update_bitmap = 1;
- ubd_set_bit(sector + i, (unsigned char *) bitmap);
- }
-
- if(!update_bitmap)
- return;
-
- *cow_offset = sector / (sizeof(unsigned long) * 8);
-
- /* This takes care of the case where we're exactly at the end of the
- * device, and *cow_offset + 1 is off the end. So, just back it up
- * by one word. Thanks to Lynn Kerby for the fix and James McMechan
- * for the original diagnosis.
- */
- if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) /
- sizeof(unsigned long) - 1))
- (*cow_offset)--;
-
- bitmap_words[0] = bitmap[*cow_offset];
- bitmap_words[1] = bitmap[*cow_offset + 1];
-
- *cow_offset *= sizeof(unsigned long);
- *cow_offset += bitmap_offset;
-}
+ __u64 sector = req->offset / req->sectorsize;
+ int i;
-static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
- __u64 bitmap_offset, __u64 bitmap_len)
-{
- __u64 sector = req->offset >> 9;
- int i;
+ for(i = 0; i < req->length / req->sectorsize; i++){
+ if(ubd_test_bit(sector + i, bitmap))
+ continue;
- if(req->length > (sizeof(req->sector_mask) * 8) << 9)
- panic("Operation too long");
+ if(req->bitmap_start == -1)
+ req->bitmap_start = sector + i;
+ req->bitmap_end = sector + i + 1;
- if(req->op == UBD_READ) {
- for(i = 0; i < req->length >> 9; i++){
- if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
- ubd_set_bit(i, (unsigned char *)
- &req->sector_mask);
- }
- }
- else cowify_bitmap(req->offset, req->length, &req->sector_mask,
- &req->cow_offset, bitmap, bitmap_offset,
- req->bitmap_words, bitmap_len);
+ ubd_set_bit(sector + i, bitmap);
+ }
}
/* Called with ubd_io_lock held */
-static int prepare_request(struct request *req, struct io_thread_req *io_req)
+static int prepare_request(struct request *req, struct io_thread_req *io_req,
+ unsigned long long offset, int page_offset,
+ int len, struct page *page)
{
struct gendisk *disk = req->rq_disk;
struct ubd *dev = disk->private_data;
- __u64 offset;
- int len;
-
- if(req->rq_status == RQ_INACTIVE) return(1);
/* This should be impossible now */
if((rq_data_dir(req) == WRITE) && !dev->openflags.w){
printk("Write attempted on readonly ubd device %s\n",
disk->disk_name);
- end_request(req, 0);
+ ubd_end_request(req, 0, 0);
return(1);
}
- offset = ((__u64) req->sector) << 9;
- len = req->current_nr_sectors << 9;
-
io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd;
io_req->fds[1] = dev->fd;
- io_req->cow_offset = -1;
io_req->offset = offset;
io_req->length = len;
io_req->error = 0;
- io_req->sector_mask = 0;
-
- io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
+ io_req->op = (rq_data_dir(req) == READ) ? AIO_READ : AIO_WRITE;
io_req->offsets[0] = 0;
io_req->offsets[1] = dev->cow.data_offset;
- io_req->buffer = req->buffer;
+ io_req->buffer = page_address(page) + page_offset;
io_req->sectorsize = 1 << 9;
+ io_req->bitmap_offset = dev->cow.bitmap_offset;
+ io_req->bitmap_start = -1;
+ io_req->bitmap_end = -1;
- if(dev->cow.file != NULL)
- cowify_req(io_req, dev->cow.bitmap, dev->cow.bitmap_offset,
- dev->cow.bitmap_len);
-
+ if((dev->cow.file != NULL) && (io_req->op == UBD_WRITE))
+ cowify_bitmap(io_req, dev->cow.bitmap);
return(0);
}
@@ -1026,30 +996,36 @@ static void do_ubd_request(request_queue_t *q)
{
struct io_thread_req io_req;
struct request *req;
- int err, n;
-
- if(thread_fd == -1){
- while((req = elv_next_request(q)) != NULL){
- err = prepare_request(req, &io_req);
- if(!err){
- do_io(&io_req);
- __ubd_finish(req, io_req.error);
- }
- }
- }
- else {
- if(do_ubd || (req = elv_next_request(q)) == NULL)
- return;
- err = prepare_request(req, &io_req);
- if(!err){
- do_ubd = ubd_handler;
- n = os_write_file(thread_fd, (char *) &io_req,
- sizeof(io_req));
- if(n != sizeof(io_req))
- printk("write to io thread failed, "
- "errno = %d\n", -n);
+ __u64 sector;
+ int err;
+
+ if(in_ubd)
+ return;
+ in_ubd = 1;
+ while((req = elv_next_request(q)) != NULL){
+ struct gendisk *disk = req->rq_disk;
+ struct ubd *dev = disk->private_data;
+ int n, i;
+
+ blkdev_dequeue_request(req);
+
+ sector = req->sector;
+ n = blk_rq_map_sg(q, req, dev->sg);
+
+ for(i = 0; i < n; i++){
+ struct scatterlist *sg = &dev->sg[i];
+
+ err = prepare_request(req, &io_req, sector << 9,
+ sg->offset, sg->length,
+ sg->page);
+ if(err)
+ continue;
+
+ sector += sg->length >> 9;
+ do_io(&io_req, req, dev->cow.bitmap);
}
}
+ in_ubd = 0;
}
static int ubd_ioctl(struct inode * inode, struct file * file,
@@ -1265,131 +1241,95 @@ int create_cow_file(char *cow_file, char *backing_file, struct openflags flags,
return(err);
}
-static int update_bitmap(struct io_thread_req *req)
-{
- int n;
-
- if(req->cow_offset == -1)
- return(0);
-
- n = os_seek_file(req->fds[1], req->cow_offset);
- if(n < 0){
- printk("do_io - bitmap lseek failed : err = %d\n", -n);
- return(1);
- }
-
- n = os_write_file(req->fds[1], &req->bitmap_words,
- sizeof(req->bitmap_words));
- if(n != sizeof(req->bitmap_words)){
- printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
- req->fds[1]);
- return(1);
- }
-
- return(0);
-}
-
-void do_io(struct io_thread_req *req)
+void do_io(struct io_thread_req *req, struct request *r, unsigned long *bitmap)
{
- char *buf;
- unsigned long len;
- int n, nsectors, start, end, bit;
- int err;
- __u64 off;
-
- nsectors = req->length / req->sectorsize;
- start = 0;
- do {
- bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
- end = start;
- while((end < nsectors) &&
- (ubd_test_bit(end, (unsigned char *)
- &req->sector_mask) == bit))
- end++;
-
- off = req->offset + req->offsets[bit] +
- start * req->sectorsize;
- len = (end - start) * req->sectorsize;
- buf = &req->buffer[start * req->sectorsize];
-
- err = os_seek_file(req->fds[bit], off);
- if(err < 0){
- printk("do_io - lseek failed : err = %d\n", -err);
- req->error = 1;
- return;
- }
- if(req->op == UBD_READ){
- n = 0;
- do {
- buf = &buf[n];
- len -= n;
- n = os_read_file(req->fds[bit], buf, len);
- if (n < 0) {
- printk("do_io - read failed, err = %d "
- "fd = %d\n", -n, req->fds[bit]);
- req->error = 1;
- return;
- }
- } while((n < len) && (n != 0));
- if (n < len) memset(&buf[n], 0, len - n);
- } else {
- n = os_write_file(req->fds[bit], buf, len);
- if(n != len){
- printk("do_io - write failed err = %d "
- "fd = %d\n", -n, req->fds[bit]);
- req->error = 1;
- return;
- }
- }
+ struct ubd_aio *aio;
+ struct bitmap_io *bitmap_io = NULL;
+ char *buf;
+ void *bitmap_buf = NULL;
+ unsigned long len, sector;
+ int nsectors, start, end, bit, err;
+ __u64 off;
+
+ if(req->bitmap_start != -1){
+ /* Round up to the nearest word */
+ int round = sizeof(unsigned long);
+ len = (req->bitmap_end - req->bitmap_start +
+ round * 8 - 1) / (round * 8);
+ len *= round;
+
+ off = req->bitmap_start / (8 * round);
+ off *= round;
+
+ bitmap_io = kmalloc(sizeof(*bitmap_io), GFP_KERNEL);
+ if(bitmap_io == NULL){
+ printk("Failed to kmalloc bitmap IO\n");
+ req->error = 1;
+ return;
+ }
- start = end;
- } while(start < nsectors);
+ bitmap_buf = kmalloc(len, GFP_KERNEL);
+ if(bitmap_buf == NULL){
+ printk("do_io : kmalloc of bitmap chunk "
+ "failed\n");
+ kfree(bitmap_io);
+ req->error = 1;
+ return;
+ }
+ memcpy(bitmap_buf, &bitmap[off / sizeof(bitmap[0])], len);
+
+ *bitmap_io = ((struct bitmap_io)
+ { .count = ATOMIC_INIT(0),
+ .aio = INIT_AIO(AIO_WRITE, req->fds[1],
+ bitmap_buf, len,
+ req->bitmap_offset + off,
+ ubd_reply_fd) } );
+ }
- req->error = update_bitmap(req);
-}
+ nsectors = req->length / req->sectorsize;
+ start = 0;
+ end = nsectors;
+ bit = 0;
+ do {
+ if(bitmap != NULL){
+ sector = req->offset / req->sectorsize;
+ bit = ubd_test_bit(sector + start, bitmap);
+ end = start;
+ while((end < nsectors) &&
+ (ubd_test_bit(sector + end, bitmap) == bit))
+ end++;
+ }
-/* Changed in start_io_thread, which is serialized by being called only
- * from ubd_init, which is an initcall.
- */
-int kernel_fd = -1;
+ off = req->offsets[bit] + req->offset +
+ start * req->sectorsize;
+ len = (end - start) * req->sectorsize;
+ buf = &req->buffer[start * req->sectorsize];
-/* Only changed by the io thread */
-int io_count = 0;
+ aio = kmalloc(sizeof(*aio), GFP_KERNEL);
+ if(aio == NULL){
+ req->error = 1;
+ return;
+ }
-int io_thread(void *arg)
-{
- struct io_thread_req req;
- int n;
+ *aio = ((struct ubd_aio)
+ { .aio = INIT_AIO(req->op, req->fds[bit], buf,
+ len, off, ubd_reply_fd),
+ .len = len,
+ .req = r,
+ .bitmap = bitmap_io,
+ .bitmap_buf = bitmap_buf });
+
+ if(aio->bitmap != NULL)
+ atomic_inc(&aio->bitmap->count);
+
+ err = submit_aio(&aio->aio);
+ if(err){
+ printk("do_io - submit_aio failed, "
+ "err = %d\n", err);
+ req->error = 1;
+ return;
+ }
- ignore_sigwinch_sig();
- while(1){
- n = os_read_file(kernel_fd, &req, sizeof(req));
- if(n != sizeof(req)){
- if(n < 0)
- printk("io_thread - read failed, fd = %d, "
- "err = %d\n", kernel_fd, -n);
- else {
- printk("io_thread - short read, fd = %d, "
- "length = %d\n", kernel_fd, n);
- }
- continue;
- }
- io_count++;
- do_io(&req);
- n = os_write_file(kernel_fd, &req, sizeof(req));
- if(n != sizeof(req))
- printk("io_thread - write failed, fd = %d, err = %d\n",
- kernel_fd, -n);
- }
+ start = end;
+ } while(start < nsectors);
}
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only. This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/include/aio.h b/arch/um/include/aio.h
new file mode 100644
index 000000000000..83f16877ab08
--- /dev/null
+++ b/arch/um/include/aio.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2004 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#ifndef AIO_H__
+#define AIO_H__
+
+enum aio_type { AIO_READ, AIO_WRITE, AIO_MMAP };
+
+struct aio_thread_reply {
+ void *data;
+ int err;
+};
+
+struct aio_context {
+ enum aio_type type;
+ int fd;
+ void *data;
+ int len;
+ unsigned long long offset;
+ int reply_fd;
+ struct aio_context *next;
+};
+
+#define INIT_AIO(aio_type, aio_fd, aio_data, aio_len, aio_offset, \
+ aio_reply_fd) \
+ { .type = aio_type, \
+ .fd = aio_fd, \
+ .data = aio_data, \
+ .len = aio_len, \
+ .offset = aio_offset, \
+ .reply_fd = aio_reply_fd }
+
+#define INIT_AIO_CONTEXT { .reply_fd = -1, \
+ .next = NULL }
+
+extern int submit_aio(struct aio_context *aio);
+
+#endif
diff --git a/arch/um/include/common-offsets.h b/arch/um/include/common-offsets.h
index d705daa2d854..0aa620970adb 100644
--- a/arch/um/include/common-offsets.h
+++ b/arch/um/include/common-offsets.h
@@ -12,3 +12,4 @@ DEFINE_STR(UM_KERN_WARNING, KERN_WARNING);
DEFINE_STR(UM_KERN_NOTICE, KERN_NOTICE);
DEFINE_STR(UM_KERN_INFO, KERN_INFO);
DEFINE_STR(UM_KERN_DEBUG, KERN_DEBUG);
+DEFINE(HOST_ELF_CLASS, ELF_CLASS);
diff --git a/arch/um/include/init.h b/arch/um/include/init.h
index 55c2693f8778..cbd79a8d213d 100644
--- a/arch/um/include/init.h
+++ b/arch/um/include/init.h
@@ -111,7 +111,15 @@ extern struct uml_param __uml_setup_start, __uml_setup_end;
#ifndef __KERNEL__
-#define __initcall(fn) static initcall_t __initcall_##fn __init_call = fn
+#define __define_initcall(level,fn) \
+ static initcall_t __initcall_##fn __attribute_used__ \
+ __attribute__((__section__(".initcall" level ".init"))) = fn
+
+/* Userspace initcalls shouldn't depend on anything in the kernel, so we'll
+ * make them run first.
+ */
+#define __initcall(fn) __define_initcall("1", fn)
+
#define __exitcall(fn) static exitcall_t __exitcall_##fn __exit_call = fn
#define __init_call __attribute__ ((unused,__section__ (".initcall.init")))
diff --git a/arch/um/include/irq_kern.h b/arch/um/include/irq_kern.h
index 3af52a634c4c..c222d56b1494 100644
--- a/arch/um/include/irq_kern.h
+++ b/arch/um/include/irq_kern.h
@@ -7,12 +7,15 @@
#define __IRQ_KERN_H__
#include "linux/interrupt.h"
+#include "asm/ptrace.h"
extern int um_request_irq(unsigned int irq, int fd, int type,
irqreturn_t (*handler)(int, void *,
struct pt_regs *),
unsigned long irqflags, const char * devname,
void *dev_id);
+extern int init_aio_irq(int irq, char *name,
+ irqreturn_t (*handler)(int, void *, struct pt_regs *));
#endif
diff --git a/arch/um/include/mem.h b/arch/um/include/mem.h
index 10c46c38949a..e8ff0d8fa610 100644
--- a/arch/um/include/mem.h
+++ b/arch/um/include/mem.h
@@ -14,6 +14,17 @@ extern int is_remapped(void *virt);
extern int physmem_remove_mapping(void *virt);
extern void physmem_forget_descriptor(int fd);
+extern unsigned long uml_physmem;
+static inline unsigned long to_phys(void *virt)
+{
+ return(((unsigned long) virt) - uml_physmem);
+}
+
+static inline void *to_virt(unsigned long phys)
+{
+ return((void *) uml_physmem + phys);
+}
+
#endif
/*
diff --git a/arch/um/include/os.h b/arch/um/include/os.h
index 881d2988d2d8..4c362458052c 100644
--- a/arch/um/include/os.h
+++ b/arch/um/include/os.h
@@ -153,6 +153,11 @@ extern int os_file_type(char *file);
extern int os_file_mode(char *file, struct openflags *mode_out);
extern int os_lock_file(int fd, int excl);
+/* start_up.c */
+extern void os_early_checks(void);
+extern int can_do_skas(void);
+
+/* process.c */
extern unsigned long os_process_pc(int pid);
extern int os_process_parent(int pid);
extern void os_stop_process(int pid);
@@ -161,6 +166,9 @@ extern void os_kill_ptraced_process(int pid, int reap_child);
extern void os_usr1_process(int pid);
extern int os_getpid(void);
extern int os_getpgrp(void);
+extern void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int));
+extern void init_new_thread_signals(int altstack);
+extern int run_kernel_thread(int (*fn)(void *), void *arg, void **jmp_ptr);
extern int os_map_memory(void *virt, int fd, unsigned long long off,
unsigned long len, int r, int w, int x);
@@ -170,6 +178,13 @@ extern int os_unmap_memory(void *addr, int len);
extern void os_flush_stdout(void);
extern unsigned long long os_usecs(void);
+/* tt.c
+ * for tt mode only (will be deleted in future...)
+ */
+extern void forward_pending_sigio(int target);
+extern int start_fork_tramp(void *arg, unsigned long temp_stack,
+ int clone_flags, int (*tramp)(void *));
+
#endif
/*
diff --git a/arch/um/include/registers.h b/arch/um/include/registers.h
index 8744abb5224f..0a35e6d0baa0 100644
--- a/arch/um/include/registers.h
+++ b/arch/um/include/registers.h
@@ -14,6 +14,7 @@ extern int restore_fp_registers(int pid, unsigned long *fp_regs);
extern void save_registers(int pid, union uml_pt_regs *regs);
extern void restore_registers(int pid, union uml_pt_regs *regs);
extern void init_registers(int pid);
+extern void get_safe_registers(unsigned long * regs);
#endif
diff --git a/arch/um/include/syscall.h b/arch/um/include/syscall.h
new file mode 100644
index 000000000000..dda1df901a08
--- /dev/null
+++ b/arch/um/include/syscall.h
@@ -0,0 +1,12 @@
+/*
+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __SYSCALL_USER_H
+#define __SYSCALL_USER_H
+
+extern int record_syscall_start(int syscall);
+extern void record_syscall_end(int index, long result);
+
+#endif
diff --git a/arch/um/include/syscall_user.h b/arch/um/include/syscall_user.h
deleted file mode 100644
index 811d0ec2445e..000000000000
--- a/arch/um/include/syscall_user.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SYSCALL_USER_H
-#define __SYSCALL_USER_H
-
-extern int record_syscall_start(int syscall);
-extern void record_syscall_end(int index, long result);
-
-#endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only. This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/include/sysdep-i386/ptrace_user.h b/arch/um/include/sysdep-i386/ptrace_user.h
index eca8066e7a43..899aa4b2a78d 100644
--- a/arch/um/include/sysdep-i386/ptrace_user.h
+++ b/arch/um/include/sysdep-i386/ptrace_user.h
@@ -20,11 +20,24 @@
#define PT_SYSCALL_ARG3_OFFSET PT_OFFSET(EDX)
#define PT_SYSCALL_ARG4_OFFSET PT_OFFSET(ESI)
#define PT_SYSCALL_ARG5_OFFSET PT_OFFSET(EDI)
+#define PT_SYSCALL_ARG6_OFFSET PT_OFFSET(EBP)
#define PT_SYSCALL_RET_OFFSET PT_OFFSET(EAX)
+#define REGS_SYSCALL_NR EAX /* This is used before a system call */
+#define REGS_SYSCALL_ARG1 EBX
+#define REGS_SYSCALL_ARG2 ECX
+#define REGS_SYSCALL_ARG3 EDX
+#define REGS_SYSCALL_ARG4 ESI
+#define REGS_SYSCALL_ARG5 EDI
+#define REGS_SYSCALL_ARG6 EBP
+
+#define REGS_IP_INDEX EIP
+#define REGS_SP_INDEX UESP
+
#define PT_IP_OFFSET PT_OFFSET(EIP)
#define PT_IP(regs) ((regs)[EIP])
+#define PT_SP_OFFSET PT_OFFSET(UESP)
#define PT_SP(regs) ((regs)[UESP])
#ifndef FRAME_SIZE
diff --git a/arch/um/include/sysdep-i386/stub.h b/arch/um/include/sysdep-i386/stub.h
new file mode 100644
index 000000000000..d3699fe1c613
--- /dev/null
+++ b/arch/um/include/sysdep-i386/stub.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __SYSDEP_STUB_H
+#define __SYSDEP_STUB_H
+
+#include <asm/ptrace.h>
+#include <asm/unistd.h>
+
+extern void stub_segv_handler(int sig);
+extern void stub_clone_handler(void);
+
+#define STUB_SYSCALL_RET EAX
+#define STUB_MMAP_NR __NR_mmap2
+#define MMAP_OFFSET(o) ((o) >> PAGE_SHIFT)
+
+static inline long stub_syscall2(long syscall, long arg1, long arg2)
+{
+ long ret;
+
+ __asm__("movl %0, %%ecx; " : : "g" (arg2) : "%ecx");
+ __asm__("movl %0, %%ebx; " : : "g" (arg1) : "%ebx");
+ __asm__("movl %0, %%eax; " : : "g" (syscall) : "%eax");
+ __asm__("int $0x80;" : : : "%eax");
+ __asm__ __volatile__("movl %%eax, %0; " : "=g" (ret) :);
+ return(ret);
+}
+
+static inline long stub_syscall3(long syscall, long arg1, long arg2, long arg3)
+{
+ __asm__("movl %0, %%edx; " : : "g" (arg3) : "%edx");
+ return(stub_syscall2(syscall, arg1, arg2));
+}
+
+static inline long stub_syscall4(long syscall, long arg1, long arg2, long arg3,
+ long arg4)
+{
+ __asm__("movl %0, %%esi; " : : "g" (arg4) : "%esi");
+ return(stub_syscall3(syscall, arg1, arg2, arg3));
+}
+
+static inline long stub_syscall6(long syscall, long arg1, long arg2, long arg3,
+ long arg4, long arg5, long arg6)
+{
+ long ret;
+ __asm__("movl %0, %%eax; " : : "g" (syscall) : "%eax");
+ __asm__("movl %0, %%ebx; " : : "g" (arg1) : "%ebx");
+ __asm__("movl %0, %%ecx; " : : "g" (arg2) : "%ecx");
+ __asm__("movl %0, %%edx; " : : "g" (arg3) : "%edx");
+ __asm__("movl %0, %%esi; " : : "g" (arg4) : "%esi");
+ __asm__("movl %0, %%edi; " : : "g" (arg5) : "%edi");
+ __asm__ __volatile__("pushl %%ebp ; movl %1, %%ebp; "
+ "int $0x80; popl %%ebp ; "
+ "movl %%eax, %0; " : "=g" (ret) : "g" (arg6) : "%eax");
+ return(ret);
+}
+
+static inline void trap_myself(void)
+{
+ __asm("int3");
+}
+
+#endif
diff --git a/arch/um/include/sysdep-i386/syscalls.h b/arch/um/include/sysdep-i386/syscalls.h
index be0a3e3469eb..a0d5b74d3731 100644
--- a/arch/um/include/sysdep-i386/syscalls.h
+++ b/arch/um/include/sysdep-i386/syscalls.h
@@ -16,6 +16,8 @@ extern syscall_handler_t sys_rt_sigaction;
extern syscall_handler_t old_mmap_i386;
+extern syscall_handler_t *sys_call_table[];
+
#define EXECUTE_SYSCALL(syscall, regs) \
((long (*)(struct syscall_args)) (*sys_call_table[syscall]))(SYSCALL_ARGS(&regs->regs))
diff --git a/arch/um/include/sysdep-x86_64/ptrace.h b/arch/um/include/sysdep-x86_64/ptrace.h
index be8acd5efd97..331aa2d1f3f5 100644
--- a/arch/um/include/sysdep-x86_64/ptrace.h
+++ b/arch/um/include/sysdep-x86_64/ptrace.h
@@ -227,7 +227,7 @@ struct syscall_args {
panic("Bad register in UPT_SET : %d\n", reg); \
break; \
} \
- val; \
+ __upt_val; \
})
#define UPT_SET_SYSCALL_RETURN(r, res) \
diff --git a/arch/um/include/sysdep-x86_64/ptrace_user.h b/arch/um/include/sysdep-x86_64/ptrace_user.h
index 31729973fb14..128faf027364 100644
--- a/arch/um/include/sysdep-x86_64/ptrace_user.h
+++ b/arch/um/include/sysdep-x86_64/ptrace_user.h
@@ -55,6 +55,20 @@
#define PTRACE_OLDSETOPTIONS 21
#endif
+/* These are before the system call, so the the system call number is RAX
+ * rather than ORIG_RAX, and arg4 is R10 rather than RCX
+ */
+#define REGS_SYSCALL_NR PT_INDEX(RAX)
+#define REGS_SYSCALL_ARG1 PT_INDEX(RDI)
+#define REGS_SYSCALL_ARG2 PT_INDEX(RSI)
+#define REGS_SYSCALL_ARG3 PT_INDEX(RDX)
+#define REGS_SYSCALL_ARG4 PT_INDEX(R10)
+#define REGS_SYSCALL_ARG5 PT_INDEX(R8)
+#define REGS_SYSCALL_ARG6 PT_INDEX(R9)
+
+#define REGS_IP_INDEX PT_INDEX(RIP)
+#define REGS_SP_INDEX PT_INDEX(RSP)
+
#endif
/*
diff --git a/arch/um/include/sysdep-x86_64/stub.h b/arch/um/include/sysdep-x86_64/stub.h
new file mode 100644
index 000000000000..f599058d8263
--- /dev/null
+++ b/arch/um/include/sysdep-x86_64/stub.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __SYSDEP_STUB_H
+#define __SYSDEP_STUB_H
+
+#include <asm/ptrace.h>
+#include <asm/unistd.h>
+#include <sysdep/ptrace_user.h>
+
+extern void stub_segv_handler(int sig);
+extern void stub_clone_handler(void);
+
+#define STUB_SYSCALL_RET PT_INDEX(RAX)
+#define STUB_MMAP_NR __NR_mmap
+#define MMAP_OFFSET(o) (o)
+
+static inline long stub_syscall2(long syscall, long arg1, long arg2)
+{
+ long ret;
+
+ __asm__("movq %0, %%rsi; " : : "g" (arg2) : "%rsi");
+ __asm__("movq %0, %%rdi; " : : "g" (arg1) : "%rdi");
+ __asm__("movq %0, %%rax; " : : "g" (syscall) : "%rax");
+ __asm__("syscall;" : : : "%rax", "%r11", "%rcx");
+ __asm__ __volatile__("movq %%rax, %0; " : "=g" (ret) :);
+ return(ret);
+}
+
+static inline long stub_syscall3(long syscall, long arg1, long arg2, long arg3)
+{
+ __asm__("movq %0, %%rdx; " : : "g" (arg3) : "%rdx");
+ return(stub_syscall2(syscall, arg1, arg2));
+}
+
+static inline long stub_syscall4(long syscall, long arg1, long arg2, long arg3,
+ long arg4)
+{
+ __asm__("movq %0, %%r10; " : : "g" (arg4) : "%r10");
+ return(stub_syscall3(syscall, arg1, arg2, arg3));
+}
+
+static inline long stub_syscall6(long syscall, long arg1, long arg2, long arg3,
+ long arg4, long arg5, long arg6)
+{
+ __asm__("movq %0, %%r9; " : : "g" (arg6) : "%r9");
+ __asm__("movq %0, %%r8; " : : "g" (arg5) : "%r8");
+ return(stub_syscall4(syscall, arg1, arg2, arg3, arg4));
+}
+
+static inline void trap_myself(void)
+{
+ __asm("int3");
+}
+
+#endif
diff --git a/arch/um/include/sysdep-x86_64/syscalls.h b/arch/um/include/sysdep-x86_64/syscalls.h
index 67923cca5691..e06f83e80f4a 100644
--- a/arch/um/include/sysdep-x86_64/syscalls.h
+++ b/arch/um/include/sysdep-x86_64/syscalls.h
@@ -14,6 +14,8 @@ typedef long syscall_handler_t(void);
extern syscall_handler_t *ia32_sys_call_table[];
+extern syscall_handler_t *sys_call_table[];
+
#define EXECUTE_SYSCALL(syscall, regs) \
(((long (*)(long, long, long, long, long, long)) \
(*sys_call_table[syscall]))(UPT_SYSCALL_ARG1(&regs->regs), \
diff --git a/arch/um/include/time_user.h b/arch/um/include/time_user.h
index f64ef77019a3..17d7ef2141f4 100644
--- a/arch/um/include/time_user.h
+++ b/arch/um/include/time_user.h
@@ -10,6 +10,7 @@ extern void timer(void);
extern void switch_timers(int to_real);
extern void idle_sleep(int secs);
extern void enable_timer(void);
+extern void prepare_timer(void * ptr);
extern void disable_timer(void);
extern unsigned long time_lock(void);
extern void time_unlock(unsigned long);
diff --git a/arch/um/include/tlb.h b/arch/um/include/tlb.h
index da1097285b8c..45d7da6c3b2c 100644
--- a/arch/um/include/tlb.h
+++ b/arch/um/include/tlb.h
@@ -9,7 +9,7 @@
#include "um_mmu.h"
struct host_vm_op {
- enum { MMAP, MUNMAP, MPROTECT } type;
+ enum { NONE, MMAP, MUNMAP, MPROTECT } type;
union {
struct {
unsigned long addr;
@@ -37,31 +37,11 @@ struct host_vm_op {
extern void mprotect_kernel_vm(int w);
extern void force_flush_all(void);
extern void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
- unsigned long end_addr, int force, int data,
- void (*do_ops)(int, struct host_vm_op *, int));
+ unsigned long end_addr, int force,
+ int (*do_ops)(union mm_context *,
+ struct host_vm_op *, int, int,
+ void **));
extern int flush_tlb_kernel_range_common(unsigned long start,
unsigned long end);
-extern int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
- int r, int w, int x, struct host_vm_op *ops, int index,
- int last_filled, int data,
- void (*do_ops)(int, struct host_vm_op *, int));
-extern int add_munmap(unsigned long addr, unsigned long len,
- struct host_vm_op *ops, int index, int last_filled,
- int data, void (*do_ops)(int, struct host_vm_op *, int));
-extern int add_mprotect(unsigned long addr, unsigned long len, int r, int w,
- int x, struct host_vm_op *ops, int index,
- int last_filled, int data,
- void (*do_ops)(int, struct host_vm_op *, int));
#endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only. This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/include/um_uaccess.h b/arch/um/include/um_uaccess.h
index 6e348cb6de24..84c0868cd561 100644
--- a/arch/um/include/um_uaccess.h
+++ b/arch/um/include/um_uaccess.h
@@ -20,13 +20,6 @@
#define access_ok(type, addr, size) \
CHOOSE_MODE_PROC(access_ok_tt, access_ok_skas, type, addr, size)
-/* this function will go away soon - use access_ok() instead */
-static inline int __deprecated verify_area(int type, const void __user *addr, unsigned long size)
-{
- return (CHOOSE_MODE_PROC(verify_area_tt, verify_area_skas, type, addr,
- size));
-}
-
static inline int copy_from_user(void *to, const void __user *from, int n)
{
return(CHOOSE_MODE_PROC(copy_from_user_tt, copy_from_user_skas, to,
diff --git a/arch/um/include/user_util.h b/arch/um/include/user_util.h
index 7b6a24dfd302..bb505e01d994 100644
--- a/arch/um/include/user_util.h
+++ b/arch/um/include/user_util.h
@@ -54,8 +54,6 @@ extern void stack_protections(unsigned long address);
extern void task_protections(unsigned long address);
extern int wait_for_stop(int pid, int sig, int cont_type, void *relay);
extern void *add_signal_handler(int sig, void (*handler)(int));
-extern int start_fork_tramp(void *arg, unsigned long temp_stack,
- int clone_flags, int (*tramp)(void *));
extern int linux_main(int argc, char **argv);
extern void set_cmdline(char *cmd);
extern void input_cb(void (*proc)(void *), void *arg, int arg_len);
@@ -64,8 +62,6 @@ extern void *um_kmalloc(int size);
extern int switcheroo(int fd, int prot, void *from, void *to, int size);
extern void setup_machinename(char *machine_out);
extern void setup_hostinfo(void);
-extern void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int));
-extern void init_new_thread_signals(int altstack);
extern void do_exec(int old_pid, int new_pid);
extern void tracer_panic(char *msg, ...);
extern char *get_umid(int only_if_set);
@@ -74,16 +70,12 @@ extern int detach(int pid, int sig);
extern int attach(int pid);
extern void kill_child_dead(int pid);
extern int cont(int pid);
-extern void check_ptrace(void);
extern void check_sigio(void);
-extern int run_kernel_thread(int (*fn)(void *), void *arg, void **jmp_ptr);
extern void write_sigio_workaround(void);
extern void arch_check_bugs(void);
extern int cpu_feature(char *what, char *buf, int len);
extern int arch_handle_signal(int sig, union uml_pt_regs *regs);
extern int arch_fixup(unsigned long address, void *sc_ptr);
-extern void forward_pending_sigio(int target);
-extern int can_do_skas(void);
extern void arch_init_thread(void);
extern int setjmp_wrapper(void (*proc)(void *, void *), ...);
extern int raw(int fd);
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index a8918e80df96..614b8ebeb0ed 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -8,25 +8,24 @@ clean-files :=
obj-y = config.o exec_kern.o exitcode.o \
helper.o init_task.o irq.o irq_user.o ksyms.o main.o mem.o mem_user.o \
- physmem.o process.o process_kern.o ptrace.o reboot.o resource.o \
- sigio_user.o sigio_kern.o signal_kern.o signal_user.o smp.o \
- syscall_kern.o sysrq.o tempfile.o time.o time_kern.o \
- tlb.o trap_kern.o trap_user.o uaccess_user.o um_arch.o umid.o \
- user_util.o
+ physmem.o process_kern.o ptrace.o reboot.o resource.o sigio_user.o \
+ sigio_kern.o signal_kern.o signal_user.o smp.o syscall_kern.o sysrq.o \
+ tempfile.o time.o time_kern.o tlb.o trap_kern.o trap_user.o \
+ uaccess_user.o um_arch.o umid.o user_util.o
obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o
obj-$(CONFIG_GPROF) += gprof_syms.o
obj-$(CONFIG_GCOV) += gmon_syms.o
obj-$(CONFIG_TTY_LOG) += tty_log.o
-obj-$(CONFIG_SYSCALL_DEBUG) += syscall_user.o
+obj-$(CONFIG_SYSCALL_DEBUG) += syscall.o
obj-$(CONFIG_MODE_TT) += tt/
obj-$(CONFIG_MODE_SKAS) += skas/
user-objs-$(CONFIG_TTY_LOG) += tty_log.o
-USER_OBJS := $(user-objs-y) config.o helper.o main.o process.o tempfile.o \
- time.o tty_log.o umid.o user_util.o
+USER_OBJS := $(user-objs-y) config.o helper.o main.o tempfile.o time.o \
+ tty_log.o umid.o user_util.o
include arch/um/scripts/Makefile.rules
diff --git a/arch/um/kernel/asm-offsets.c b/arch/um/kernel/asm-offsets.c
new file mode 100644
index 000000000000..c13a64a288f6
--- /dev/null
+++ b/arch/um/kernel/asm-offsets.c
@@ -0,0 +1 @@
+/* Dummy file to make kbuild happy - unused! */
diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S
index 715b0838a68c..2517ecb8bf27 100644
--- a/arch/um/kernel/dyn.lds.S
+++ b/arch/um/kernel/dyn.lds.S
@@ -67,6 +67,12 @@ SECTIONS
*(.stub .text.* .gnu.linkonce.t.*)
/* .gnu.warning sections are handled specially by elf32.em. */
*(.gnu.warning)
+
+ . = ALIGN(4096);
+ __syscall_stub_start = .;
+ *(.__syscall_stub*)
+ __syscall_stub_end = .;
+ . = ALIGN(4096);
} =0x90909090
.fini : {
KEEP (*(.fini))
@@ -140,37 +146,8 @@ SECTIONS
}
_end = .;
PROVIDE (end = .);
- /* Stabs debugging sections. */
- .stab 0 : { *(.stab) }
- .stabstr 0 : { *(.stabstr) }
- .stab.excl 0 : { *(.stab.excl) }
- .stab.exclstr 0 : { *(.stab.exclstr) }
- .stab.index 0 : { *(.stab.index) }
- .stab.indexstr 0 : { *(.stab.indexstr) }
- .comment 0 : { *(.comment) }
- /* DWARF debug sections.
- Symbols in the DWARF debugging sections are relative to the beginning
- of the section so we begin them at 0. */
- /* DWARF 1 */
- .debug 0 : { *(.debug) }
- .line 0 : { *(.line) }
- /* GNU DWARF 1 extensions */
- .debug_srcinfo 0 : { *(.debug_srcinfo) }
- .debug_sfnames 0 : { *(.debug_sfnames) }
- /* DWARF 1.1 and DWARF 2 */
- .debug_aranges 0 : { *(.debug_aranges) }
- .debug_pubnames 0 : { *(.debug_pubnames) }
- /* DWARF 2 */
- .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
- .debug_abbrev 0 : { *(.debug_abbrev) }
- .debug_line 0 : { *(.debug_line) }
- .debug_frame 0 : { *(.debug_frame) }
- .debug_str 0 : { *(.debug_str) }
- .debug_loc 0 : { *(.debug_loc) }
- .debug_macinfo 0 : { *(.debug_macinfo) }
- /* SGI/MIPS DWARF 2 extensions */
- .debug_weaknames 0 : { *(.debug_weaknames) }
- .debug_funcnames 0 : { *(.debug_funcnames) }
- .debug_typenames 0 : { *(.debug_typenames) }
- .debug_varnames 0 : { *(.debug_varnames) }
+
+ STABS_DEBUG
+
+ DWARF_DEBUG
}
diff --git a/arch/um/kernel/exitcode.c b/arch/um/kernel/exitcode.c
index 0ea87f24b36f..d21ebad666b4 100644
--- a/arch/um/kernel/exitcode.c
+++ b/arch/um/kernel/exitcode.c
@@ -48,7 +48,7 @@ static int make_proc_exitcode(void)
ent = create_proc_entry("exitcode", 0600, &proc_root);
if(ent == NULL){
- printk("make_proc_exitcode : Failed to register "
+ printk(KERN_WARNING "make_proc_exitcode : Failed to register "
"/proc/exitcode\n");
return(0);
}
diff --git a/arch/um/kernel/helper.c b/arch/um/kernel/helper.c
index 13b1f5c2f7ee..f83e1e8e2392 100644
--- a/arch/um/kernel/helper.c
+++ b/arch/um/kernel/helper.c
@@ -13,6 +13,7 @@
#include "user.h"
#include "kern_util.h"
#include "user_util.h"
+#include "helper.h"
#include "os.h"
struct helper_data {
@@ -149,7 +150,7 @@ int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags,
return(pid);
}
-int helper_wait(int pid, int block)
+int helper_wait(int pid)
{
int ret;
@@ -160,14 +161,3 @@ int helper_wait(int pid, int block)
}
return(ret);
}
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only. This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 9f18061ef4c9..dcd814971995 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -31,7 +31,7 @@
#include "kern_util.h"
#include "irq_user.h"
#include "irq_kern.h"
-
+#include "os.h"
/*
* Generic, controller-independent functions:
@@ -168,13 +168,32 @@ void __init init_IRQ(void)
}
}
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only. This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
+int init_aio_irq(int irq, char *name, irqreturn_t (*handler)(int, void *,
+ struct pt_regs *))
+{
+ int fds[2], err;
+
+ err = os_pipe(fds, 1, 1);
+ if(err){
+ printk("init_aio_irq - os_pipe failed, err = %d\n", -err);
+ goto out;
+ }
+
+ err = um_request_irq(irq, fds[0], IRQ_READ, handler,
+ SA_INTERRUPT | SA_SAMPLE_RANDOM, name,
+ (void *) (long) fds[0]);
+ if(err){
+ printk("init_aio_irq - : um_request_irq failed, err = %d\n",
+ err);
+ goto out_close;
+ }
+
+ err = fds[1];
+ goto out;
+
+ out_close:
+ os_close_file(fds[0]);
+ os_close_file(fds[1]);
+ out:
+ return(err);
+}
diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
index 99439fa15ef4..a97a72e516aa 100644
--- a/arch/um/kernel/ksyms.c
+++ b/arch/um/kernel/ksyms.c
@@ -34,14 +34,9 @@ EXPORT_SYMBOL(host_task_size);
EXPORT_SYMBOL(arch_validate);
EXPORT_SYMBOL(get_kmem_end);
-EXPORT_SYMBOL(page_to_phys);
-EXPORT_SYMBOL(phys_to_page);
EXPORT_SYMBOL(high_physmem);
EXPORT_SYMBOL(empty_zero_page);
EXPORT_SYMBOL(um_virt_to_phys);
-EXPORT_SYMBOL(__virt_to_page);
-EXPORT_SYMBOL(to_phys);
-EXPORT_SYMBOL(to_virt);
EXPORT_SYMBOL(mode_tt);
EXPORT_SYMBOL(handle_page_fault);
EXPORT_SYMBOL(find_iomem);
@@ -114,22 +109,3 @@ extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
EXPORT_SYMBOL(__read_lock_failed);
#endif
-
-#ifdef CONFIG_HIGHMEM
-EXPORT_SYMBOL(kmap);
-EXPORT_SYMBOL(kunmap);
-EXPORT_SYMBOL(kmap_atomic);
-EXPORT_SYMBOL(kunmap_atomic);
-EXPORT_SYMBOL(kmap_atomic_to_page);
-#endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only. This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/main.c b/arch/um/kernel/main.c
index 1e1a87f1c510..d31027f0fe39 100644
--- a/arch/um/kernel/main.c
+++ b/arch/um/kernel/main.c
@@ -97,7 +97,7 @@ int main(int argc, char **argv, char **envp)
exit(1);
}
-#ifdef UML_CONFIG_MODE_TT
+#ifdef UML_CONFIG_CMDLINE_ON_HOST
/* Allocate memory for thread command lines */
if(argc < 2 || strlen(argv[1]) < THREAD_NAME_LEN - 1){
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 5597bd39e6b5..64fa062cc119 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -196,7 +196,7 @@ static void init_highmem(void)
static void __init fixaddr_user_init( void)
{
-#if CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA
+#ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA
long size = FIXADDR_USER_END - FIXADDR_USER_START;
pgd_t *pgd;
pud_t *pud;
diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
index 420e6d51fa0f..ea670fcc8af5 100644
--- a/arch/um/kernel/physmem.c
+++ b/arch/um/kernel/physmem.c
@@ -248,16 +248,6 @@ unsigned long high_physmem;
extern unsigned long physmem_size;
-void *to_virt(unsigned long phys)
-{
- return((void *) uml_physmem + phys);
-}
-
-unsigned long to_phys(void *virt)
-{
- return(((unsigned long) virt) - uml_physmem);
-}
-
int init_maps(unsigned long physmem, unsigned long iomem, unsigned long highmem)
{
struct page *p, *map;
@@ -298,31 +288,6 @@ int init_maps(unsigned long physmem, unsigned long iomem, unsigned long highmem)
return(0);
}
-struct page *phys_to_page(const unsigned long phys)
-{
- return(&mem_map[phys >> PAGE_SHIFT]);
-}
-
-struct page *__virt_to_page(const unsigned long virt)
-{
- return(&mem_map[__pa(virt) >> PAGE_SHIFT]);
-}
-
-phys_t page_to_phys(struct page *page)
-{
- return((page - mem_map) << PAGE_SHIFT);
-}
-
-pte_t mk_pte(struct page *page, pgprot_t pgprot)
-{
- pte_t pte;
-
- pte_set_val(pte, page_to_phys(page), pgprot);
- if(pte_present(pte))
- pte_mknewprot(pte_mknewpage(pte));
- return(pte);
-}
-
/* Changed during early boot */
static unsigned long kmem_top = 0;
@@ -353,6 +318,8 @@ void map_memory(unsigned long virt, unsigned long phys, unsigned long len,
#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
+extern int __syscall_stub_start, __binary_start;
+
void setup_physmem(unsigned long start, unsigned long reserve_end,
unsigned long len, unsigned long highmem)
{
@@ -371,6 +338,12 @@ void setup_physmem(unsigned long start, unsigned long reserve_end,
exit(1);
}
+ /* Special kludge - This page will be mapped in to userspace processes
+ * from physmem_fd, so it needs to be written out there.
+ */
+ os_seek_file(physmem_fd, __pa(&__syscall_stub_start));
+ os_write_file(physmem_fd, &__syscall_stub_start, PAGE_SIZE);
+
bootmap_size = init_bootmem(pfn, pfn + delta);
free_bootmem(__pa(reserve_end) + bootmap_size,
len - bootmap_size - reserve);
diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c
index d4036ed680bc..c23d8a08d0ff 100644
--- a/arch/um/kernel/process_kern.c
+++ b/arch/um/kernel/process_kern.c
@@ -412,7 +412,7 @@ int __init make_proc_sysemu(void)
if (ent == NULL)
{
- printk("Failed to register /proc/sysemu\n");
+ printk(KERN_WARNING "Failed to register /proc/sysemu\n");
return(0);
}
diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c
index fcec51da1d37..a637e885c583 100644
--- a/arch/um/kernel/reboot.c
+++ b/arch/um/kernel/reboot.c
@@ -49,23 +49,17 @@ void machine_restart(char * __unused)
CHOOSE_MODE(reboot_tt(), reboot_skas());
}
-EXPORT_SYMBOL(machine_restart);
-
void machine_power_off(void)
{
uml_cleanup();
CHOOSE_MODE(halt_tt(), halt_skas());
}
-EXPORT_SYMBOL(machine_power_off);
-
void machine_halt(void)
{
machine_power_off();
}
-EXPORT_SYMBOL(machine_halt);
-
/*
* Overrides for Emacs so that we follow Linus's tabbing style.
* Emacs will notice this stuff at the end of the file and automatically
diff --git a/arch/um/kernel/signal_kern.c b/arch/um/kernel/signal_kern.c
index 7807a3e8c426..03618bd13d55 100644
--- a/arch/um/kernel/signal_kern.c
+++ b/arch/um/kernel/signal_kern.c
@@ -87,12 +87,12 @@ static int handle_signal(struct pt_regs *regs, unsigned long signr,
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
force_sigsegv(signr, current);
- }
- else if(!(ka->sa.sa_flags & SA_NODEFER)){
+ } else {
spin_lock_irq(&current->sighand->siglock);
sigorsets(&current->blocked, &current->blocked,
&ka->sa.sa_mask);
- sigaddset(&current->blocked, signr);
+ if(!(ka->sa.sa_flags & SA_NODEFER))
+ sigaddset(&current->blocked, signr);
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
}
diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile
index ff69c4b312c0..db36c7c95940 100644
--- a/arch/um/kernel/skas/Makefile
+++ b/arch/um/kernel/skas/Makefile
@@ -3,11 +3,14 @@
# Licensed under the GPL
#
-obj-y := exec_kern.o mem.o mem_user.o mmu.o process.o process_kern.o \
- syscall_kern.o syscall_user.o tlb.o trap_user.o uaccess.o \
+obj-y := clone.o exec_kern.o mem.o mem_user.o mmu.o process.o process_kern.o \
+ syscall.o tlb.o trap_user.o uaccess.o
subdir- := util
-USER_OBJS := process.o
+USER_OBJS := process.o clone.o
include arch/um/scripts/Makefile.rules
+
+# clone.o is in the stub, so it can't be built with profiling
+$(obj)/clone.o : c_flags = -Wp,-MD,$(depfile) $(call unprofile,$(USER_CFLAGS))
diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c
new file mode 100644
index 000000000000..4dc55f10cd18
--- /dev/null
+++ b/arch/um/kernel/skas/clone.c
@@ -0,0 +1,44 @@
+#include <sched.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+#include <asm/unistd.h>
+#include <asm/page.h>
+#include "ptrace_user.h"
+#include "skas.h"
+#include "stub-data.h"
+#include "uml-config.h"
+#include "sysdep/stub.h"
+
+/* This is in a separate file because it needs to be compiled with any
+ * extraneous gcc flags (-pg, -fprofile-arcs, -ftest-coverage) disabled
+ */
+void __attribute__ ((__section__ (".__syscall_stub")))
+stub_clone_handler(void)
+{
+ long err;
+ struct stub_data *from = (struct stub_data *) UML_CONFIG_STUB_DATA;
+
+ err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD,
+ UML_CONFIG_STUB_DATA + PAGE_SIZE / 2 -
+ sizeof(void *));
+ if(err != 0)
+ goto out;
+
+ err = stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0);
+ if(err)
+ goto out;
+
+ err = stub_syscall3(__NR_setitimer, ITIMER_VIRTUAL,
+ (long) &from->timer, 0);
+ if(err)
+ goto out;
+
+ err = stub_syscall6(STUB_MMAP_NR, UML_CONFIG_STUB_DATA, PAGE_SIZE,
+ PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED,
+ from->fd, from->offset);
+ out:
+ /* save current result. Parent: pid; child: retcode of mmap */
+ from->err = err;
+ trap_myself();
+}
diff --git a/arch/um/kernel/skas/exec_kern.c b/arch/um/kernel/skas/exec_kern.c
index c6b4d5dba789..77ed7bbab219 100644
--- a/arch/um/kernel/skas/exec_kern.c
+++ b/arch/um/kernel/skas/exec_kern.c
@@ -18,7 +18,7 @@
void flush_thread_skas(void)
{
force_flush_all();
- switch_mm_skas(current->mm->context.skas.mm_fd);
+ switch_mm_skas(&current->mm->context.skas.id);
}
void start_thread_skas(struct pt_regs *regs, unsigned long eip,
diff --git a/arch/um/kernel/skas/include/mm_id.h b/arch/um/kernel/skas/include/mm_id.h
new file mode 100644
index 000000000000..48dd0989ddaa
--- /dev/null
+++ b/arch/um/kernel/skas/include/mm_id.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2005 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __MM_ID_H
+#define __MM_ID_H
+
+struct mm_id {
+ union {
+ int mm_fd;
+ int pid;
+ } u;
+ unsigned long stack;
+};
+
+#endif
diff --git a/arch/um/kernel/skas/include/mmu-skas.h b/arch/um/kernel/skas/include/mmu-skas.h
index 4cd60d7213f3..09536f81ee42 100644
--- a/arch/um/kernel/skas/include/mmu-skas.h
+++ b/arch/um/kernel/skas/include/mmu-skas.h
@@ -6,10 +6,19 @@
#ifndef __SKAS_MMU_H
#define __SKAS_MMU_H
+#include "linux/config.h"
+#include "mm_id.h"
+
struct mmu_context_skas {
- int mm_fd;
+ struct mm_id id;
+ unsigned long last_page_table;
+#ifdef CONFIG_3_LEVEL_PGTABLES
+ unsigned long last_pmd;
+#endif
};
+extern void switch_mm_skas(struct mm_id * mm_idp);
+
#endif
/*
diff --git a/arch/um/kernel/skas/include/skas.h b/arch/um/kernel/skas/include/skas.h
index 96b51dba3471..060934740f9f 100644
--- a/arch/um/kernel/skas/include/skas.h
+++ b/arch/um/kernel/skas/include/skas.h
@@ -6,9 +6,11 @@
#ifndef __SKAS_H
#define __SKAS_H
+#include "mm_id.h"
#include "sysdep/ptrace.h"
extern int userspace_pid[];
+extern int proc_mm, ptrace_faultinfo;
extern void switch_threads(void *me, void *next);
extern void thread_wait(void *sw, void *fb);
@@ -22,26 +24,26 @@ extern void new_thread_proc(void *stack, void (*handler)(int sig));
extern void remove_sigstack(void);
extern void new_thread_handler(int sig);
extern void handle_syscall(union uml_pt_regs *regs);
-extern void map(int fd, unsigned long virt, unsigned long len, int r, int w,
- int x, int phys_fd, unsigned long long offset);
-extern int unmap(int fd, void *addr, unsigned long len);
-extern int protect(int fd, unsigned long addr, unsigned long len,
- int r, int w, int x);
+extern int map(struct mm_id * mm_idp, unsigned long virt,
+ unsigned long len, int r, int w, int x, int phys_fd,
+ unsigned long long offset, int done, void **data);
+extern int unmap(struct mm_id * mm_idp, void *addr, unsigned long len,
+ int done, void **data);
+extern int protect(struct mm_id * mm_idp, unsigned long addr,
+ unsigned long len, int r, int w, int x, int done,
+ void **data);
extern void user_signal(int sig, union uml_pt_regs *regs, int pid);
-extern int new_mm(int from);
-extern void start_userspace(int cpu);
+extern int new_mm(int from, unsigned long stack);
+extern int start_userspace(unsigned long stub_stack);
+extern int copy_context_skas0(unsigned long stack, int pid);
extern void get_skas_faultinfo(int pid, struct faultinfo * fi);
extern long execute_syscall_skas(void *r);
+extern unsigned long current_stub_stack(void);
+extern long run_syscall_stub(struct mm_id * mm_idp,
+ int syscall, unsigned long *args, long expected,
+ void **addr, int done);
+extern long syscall_stub_data(struct mm_id * mm_idp,
+ unsigned long *data, int data_count,
+ void **addr, void **stub_addr);
#endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only. This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/skas/include/stub-data.h b/arch/um/kernel/skas/include/stub-data.h
new file mode 100644
index 000000000000..f6ed92c3727d
--- /dev/null
+++ b/arch/um/kernel/skas/include/stub-data.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2005 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __STUB_DATA_H
+#define __STUB_DATA_H
+
+#include <sys/time.h>
+
+struct stub_data {
+ long offset;
+ int fd;
+ struct itimerval timer;
+ long err;
+};
+
+#endif
diff --git a/arch/um/kernel/skas/include/uaccess-skas.h b/arch/um/kernel/skas/include/uaccess-skas.h
index cd6c280482cb..6ee3f3902e68 100644
--- a/arch/um/kernel/skas/include/uaccess-skas.h
+++ b/arch/um/kernel/skas/include/uaccess-skas.h
@@ -18,18 +18,18 @@
((unsigned long) (addr) + (size) <= FIXADDR_USER_END) && \
((unsigned long) (addr) + (size) >= (unsigned long)(addr))))
-static inline int verify_area_skas(int type, const void * addr,
+static inline int verify_area_skas(int type, const void __user * addr,
unsigned long size)
{
return(access_ok_skas(type, addr, size) ? 0 : -EFAULT);
}
-extern int copy_from_user_skas(void *to, const void *from, int n);
-extern int copy_to_user_skas(void *to, const void *from, int n);
-extern int strncpy_from_user_skas(char *dst, const char *src, int count);
-extern int __clear_user_skas(void *mem, int len);
-extern int clear_user_skas(void *mem, int len);
-extern int strnlen_user_skas(const void *str, int len);
+extern int copy_from_user_skas(void *to, const void __user *from, int n);
+extern int copy_to_user_skas(void __user *to, const void *from, int n);
+extern int strncpy_from_user_skas(char *dst, const char __user *src, int count);
+extern int __clear_user_skas(void __user *mem, int len);
+extern int clear_user_skas(void __user *mem, int len);
+extern int strnlen_user_skas(const void __user *str, int len);
#endif
diff --git a/arch/um/kernel/skas/mem.c b/arch/um/kernel/skas/mem.c
index 438db2f43456..147466d7ff4f 100644
--- a/arch/um/kernel/skas/mem.c
+++ b/arch/um/kernel/skas/mem.c
@@ -5,7 +5,9 @@
#include "linux/config.h"
#include "linux/mm.h"
+#include "asm/pgtable.h"
#include "mem_user.h"
+#include "skas.h"
unsigned long set_task_sizes_skas(int arg, unsigned long *host_size_out,
unsigned long *task_size_out)
@@ -18,7 +20,9 @@ unsigned long set_task_sizes_skas(int arg, unsigned long *host_size_out,
*task_size_out = CONFIG_HOST_TASK_SIZE;
#else
*host_size_out = top;
- *task_size_out = top;
+ if (proc_mm && ptrace_faultinfo)
+ *task_size_out = top;
+ else *task_size_out = CONFIG_STUB_START & PGDIR_MASK;
#endif
return(((unsigned long) set_task_sizes_skas) & ~0xffffff);
}
diff --git a/arch/um/kernel/skas/mem_user.c b/arch/um/kernel/skas/mem_user.c
index 1310bf1e88d1..1d89640bd502 100644
--- a/arch/um/kernel/skas/mem_user.c
+++ b/arch/um/kernel/skas/mem_user.c
@@ -3,100 +3,279 @@
* Licensed under the GPL
*/
+#include <signal.h>
#include <errno.h>
+#include <string.h>
#include <sys/mman.h>
+#include <sys/wait.h>
+#include <asm/page.h>
+#include <asm/unistd.h>
#include "mem_user.h"
#include "mem.h"
+#include "skas.h"
#include "user.h"
#include "os.h"
#include "proc_mm.h"
+#include "ptrace_user.h"
+#include "user_util.h"
+#include "kern_util.h"
+#include "task.h"
+#include "registers.h"
+#include "uml-config.h"
+#include "sysdep/ptrace.h"
+#include "sysdep/stub.h"
-void map(int fd, unsigned long virt, unsigned long len, int r, int w,
- int x, int phys_fd, unsigned long long offset)
+extern unsigned long batch_syscall_stub, __syscall_stub_start;
+
+extern void wait_stub_done(int pid, int sig, char * fname);
+
+static inline unsigned long *check_init_stack(struct mm_id * mm_idp,
+ unsigned long *stack)
{
- struct proc_mm_op map;
- int prot, n;
-
- prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) |
- (x ? PROT_EXEC : 0);
-
- map = ((struct proc_mm_op) { .op = MM_MMAP,
- .u =
- { .mmap =
- { .addr = virt,
- .len = len,
- .prot = prot,
- .flags = MAP_SHARED |
- MAP_FIXED,
- .fd = phys_fd,
- .offset = offset
- } } } );
- n = os_write_file(fd, &map, sizeof(map));
- if(n != sizeof(map))
- printk("map : /proc/mm map failed, err = %d\n", -n);
+ if(stack == NULL){
+ stack = (unsigned long *) mm_idp->stack + 2;
+ *stack = 0;
+ }
+ return stack;
}
-int unmap(int fd, void *addr, unsigned long len)
+extern int proc_mm;
+
+int single_count = 0;
+int multi_count = 0;
+int multi_op_count = 0;
+
+static long do_syscall_stub(struct mm_id *mm_idp, void **addr)
{
- struct proc_mm_op unmap;
- int n;
-
- unmap = ((struct proc_mm_op) { .op = MM_MUNMAP,
- .u =
- { .munmap =
- { .addr = (unsigned long) addr,
- .len = len } } } );
- n = os_write_file(fd, &unmap, sizeof(unmap));
- if(n != sizeof(unmap)) {
- if(n < 0)
- return(n);
- else if(n > 0)
- return(-EIO);
+ unsigned long regs[MAX_REG_NR];
+ unsigned long *data;
+ unsigned long *syscall;
+ long ret, offset;
+ int n, pid = mm_idp->u.pid;
+
+ if(proc_mm)
+#warning Need to look up userspace_pid by cpu
+ pid = userspace_pid[0];
+
+ multi_count++;
+
+ get_safe_registers(regs);
+ regs[REGS_IP_INDEX] = UML_CONFIG_STUB_CODE +
+ ((unsigned long) &batch_syscall_stub -
+ (unsigned long) &__syscall_stub_start);
+ n = ptrace_setregs(pid, regs);
+ if(n < 0)
+ panic("do_syscall_stub : PTRACE_SETREGS failed, errno = %d\n",
+ n);
+
+ wait_stub_done(pid, 0, "do_syscall_stub");
+
+ /* When the stub stops, we find the following values on the
+ * beginning of the stack:
+ * (long )return_value
+ * (long )offset to failed sycall-data (0, if no error)
+ */
+ ret = *((unsigned long *) mm_idp->stack);
+ offset = *((unsigned long *) mm_idp->stack + 1);
+ if (offset) {
+ data = (unsigned long *)(mm_idp->stack +
+ offset - UML_CONFIG_STUB_DATA);
+ syscall = (unsigned long *)((unsigned long)data + data[0]);
+ printk("do_syscall_stub: syscall %ld failed, return value = "
+ "0x%lx, expected return value = 0x%lx\n",
+ syscall[0], ret, syscall[7]);
+ printk(" syscall parameters: "
+ "0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
+ syscall[1], syscall[2], syscall[3],
+ syscall[4], syscall[5], syscall[6]);
+ for(n = 1; n < data[0]/sizeof(long); n++) {
+ if(n == 1)
+ printk(" additional syscall data:");
+ if(n % 4 == 1)
+ printk("\n ");
+ printk(" 0x%lx", data[n]);
+ }
+ if(n > 1)
+ printk("\n");
}
+ else ret = 0;
+
+ *addr = check_init_stack(mm_idp, NULL);
- return(0);
+ return ret;
}
-int protect(int fd, unsigned long addr, unsigned long len, int r, int w,
- int x, int must_succeed)
+long run_syscall_stub(struct mm_id * mm_idp, int syscall,
+ unsigned long *args, long expected, void **addr,
+ int done)
{
- struct proc_mm_op protect;
- int prot, n;
+ unsigned long *stack = check_init_stack(mm_idp, *addr);
- prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) |
- (x ? PROT_EXEC : 0);
+ if(done && *addr == NULL)
+ single_count++;
- protect = ((struct proc_mm_op) { .op = MM_MPROTECT,
- .u =
- { .mprotect =
- { .addr = (unsigned long) addr,
- .len = len,
- .prot = prot } } } );
+ *stack += sizeof(long);
+ stack += *stack / sizeof(long);
- n = os_write_file(fd, &protect, sizeof(protect));
- if(n != sizeof(protect)) {
- if(n == 0) return(0);
+ *stack++ = syscall;
+ *stack++ = args[0];
+ *stack++ = args[1];
+ *stack++ = args[2];
+ *stack++ = args[3];
+ *stack++ = args[4];
+ *stack++ = args[5];
+ *stack++ = expected;
+ *stack = 0;
+ multi_op_count++;
- if(must_succeed)
- panic("protect failed, err = %d", -n);
+ if(!done && ((((unsigned long) stack) & ~PAGE_MASK) <
+ PAGE_SIZE - 10 * sizeof(long))){
+ *addr = stack;
+ return 0;
+ }
- return(-EIO);
+ return do_syscall_stub(mm_idp, addr);
+}
+
+long syscall_stub_data(struct mm_id * mm_idp,
+ unsigned long *data, int data_count,
+ void **addr, void **stub_addr)
+{
+ unsigned long *stack;
+ int ret = 0;
+
+ /* If *addr still is uninitialized, it *must* contain NULL.
+ * Thus in this case do_syscall_stub correctly won't be called.
+ */
+ if((((unsigned long) *addr) & ~PAGE_MASK) >=
+ PAGE_SIZE - (10 + data_count) * sizeof(long)) {
+ ret = do_syscall_stub(mm_idp, addr);
+ /* in case of error, don't overwrite data on stack */
+ if(ret)
+ return ret;
}
- return(0);
+ stack = check_init_stack(mm_idp, *addr);
+ *addr = stack;
+
+ *stack = data_count * sizeof(long);
+
+ memcpy(stack + 1, data, data_count * sizeof(long));
+
+ *stub_addr = (void *)(((unsigned long)(stack + 1) & ~PAGE_MASK) +
+ UML_CONFIG_STUB_DATA);
+
+ return 0;
}
-void before_mem_skas(unsigned long unused)
+int map(struct mm_id * mm_idp, unsigned long virt, unsigned long len,
+ int r, int w, int x, int phys_fd, unsigned long long offset,
+ int done, void **data)
+{
+ int prot, ret;
+
+ prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) |
+ (x ? PROT_EXEC : 0);
+
+ if(proc_mm){
+ struct proc_mm_op map;
+ int fd = mm_idp->u.mm_fd;
+
+ map = ((struct proc_mm_op) { .op = MM_MMAP,
+ .u =
+ { .mmap =
+ { .addr = virt,
+ .len = len,
+ .prot = prot,
+ .flags = MAP_SHARED |
+ MAP_FIXED,
+ .fd = phys_fd,
+ .offset= offset
+ } } } );
+ ret = os_write_file(fd, &map, sizeof(map));
+ if(ret != sizeof(map))
+ printk("map : /proc/mm map failed, err = %d\n", -ret);
+ else ret = 0;
+ }
+ else {
+ unsigned long args[] = { virt, len, prot,
+ MAP_SHARED | MAP_FIXED, phys_fd,
+ MMAP_OFFSET(offset) };
+
+ ret = run_syscall_stub(mm_idp, STUB_MMAP_NR, args, virt,
+ data, done);
+ }
+
+ return ret;
+}
+
+int unmap(struct mm_id * mm_idp, void *addr, unsigned long len, int done,
+ void **data)
{
+ int ret;
+
+ if(proc_mm){
+ struct proc_mm_op unmap;
+ int fd = mm_idp->u.mm_fd;
+
+ unmap = ((struct proc_mm_op) { .op = MM_MUNMAP,
+ .u =
+ { .munmap =
+ { .addr =
+ (unsigned long) addr,
+ .len = len } } } );
+ ret = os_write_file(fd, &unmap, sizeof(unmap));
+ if(ret != sizeof(unmap))
+ printk("unmap - proc_mm write returned %d\n", ret);
+ else ret = 0;
+ }
+ else {
+ unsigned long args[] = { (unsigned long) addr, len, 0, 0, 0,
+ 0 };
+
+ ret = run_syscall_stub(mm_idp, __NR_munmap, args, 0,
+ data, done);
+ if(ret < 0)
+ printk("munmap stub failed, errno = %d\n", ret);
+ }
+
+ return ret;
}
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only. This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
+int protect(struct mm_id * mm_idp, unsigned long addr, unsigned long len,
+ int r, int w, int x, int done, void **data)
+{
+ struct proc_mm_op protect;
+ int prot, ret;
+
+ prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) |
+ (x ? PROT_EXEC : 0);
+
+ if(proc_mm){
+ int fd = mm_idp->u.mm_fd;
+ protect = ((struct proc_mm_op) { .op = MM_MPROTECT,
+ .u =
+ { .mprotect =
+ { .addr =
+ (unsigned long) addr,
+ .len = len,
+ .prot = prot } } } );
+
+ ret = os_write_file(fd, &protect, sizeof(protect));
+ if(ret != sizeof(protect))
+ printk("protect failed, err = %d", -ret);
+ else ret = 0;
+ }
+ else {
+ unsigned long args[] = { addr, len, prot, 0, 0, 0 };
+
+ ret = run_syscall_stub(mm_idp, __NR_mprotect, args, 0,
+ data, done);
+ }
+
+ return ret;
+}
+
+void before_mem_skas(unsigned long unused)
+{
+}
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 6cb9a6d028a9..240143b616a2 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -3,46 +3,154 @@
* Licensed under the GPL
*/
+#include "linux/config.h"
#include "linux/sched.h"
#include "linux/list.h"
#include "linux/spinlock.h"
#include "linux/slab.h"
+#include "linux/errno.h"
+#include "linux/mm.h"
#include "asm/current.h"
#include "asm/segment.h"
#include "asm/mmu.h"
+#include "asm/pgalloc.h"
+#include "asm/pgtable.h"
#include "os.h"
#include "skas.h"
+extern int __syscall_stub_start;
+
+static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
+ unsigned long kernel)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ spin_lock(&mm->page_table_lock);
+ pgd = pgd_offset(mm, proc);
+ pud = pud_alloc(mm, pgd, proc);
+ if (!pud)
+ goto out;
+
+ pmd = pmd_alloc(mm, pud, proc);
+ if (!pmd)
+ goto out_pmd;
+
+ pte = pte_alloc_map(mm, pmd, proc);
+ if (!pte)
+ goto out_pte;
+
+ /* There's an interaction between the skas0 stub pages, stack
+ * randomization, and the BUG at the end of exit_mmap. exit_mmap
+ * checks that the number of page tables freed is the same as had
+ * been allocated. If the stack is on the last page table page,
+ * then the stack pte page will be freed, and if not, it won't. To
+ * avoid having to know where the stack is, or if the process mapped
+ * something at the top of its address space for some other reason,
+ * we set TASK_SIZE to end at the start of the last page table.
+ * This keeps exit_mmap off the last page, but introduces a leak
+ * of that page. So, we hang onto it here and free it in
+ * destroy_context_skas.
+ */
+
+ mm->context.skas.last_page_table = pmd_page_kernel(*pmd);
+#ifdef CONFIG_3_LEVEL_PGTABLES
+ mm->context.skas.last_pmd = (unsigned long) __va(pud_val(*pud));
+#endif
+
+ *pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT));
+ *pte = pte_mkexec(*pte);
+ *pte = pte_wrprotect(*pte);
+ spin_unlock(&mm->page_table_lock);
+ return(0);
+
+ out_pmd:
+ pud_free(pud);
+ out_pte:
+ pmd_free(pmd);
+ out:
+ spin_unlock(&mm->page_table_lock);
+ return(-ENOMEM);
+}
+
int init_new_context_skas(struct task_struct *task, struct mm_struct *mm)
{
- int from;
+ struct mm_struct *cur_mm = current->mm;
+ struct mm_id *cur_mm_id = &cur_mm->context.skas.id;
+ struct mm_id *mm_id = &mm->context.skas.id;
+ unsigned long stack = 0;
+ int from, ret = -ENOMEM;
+
+ if(!proc_mm || !ptrace_faultinfo){
+ stack = get_zeroed_page(GFP_KERNEL);
+ if(stack == 0)
+ goto out;
- if((current->mm != NULL) && (current->mm != &init_mm))
- from = current->mm->context.skas.mm_fd;
- else from = -1;
+ /* This zeros the entry that pgd_alloc didn't, needed since
+ * we are about to reinitialize it, and want mm.nr_ptes to
+ * be accurate.
+ */
+ mm->pgd[USER_PTRS_PER_PGD] = __pgd(0);
- mm->context.skas.mm_fd = new_mm(from);
- if(mm->context.skas.mm_fd < 0){
- printk("init_new_context_skas - new_mm failed, errno = %d\n",
- mm->context.skas.mm_fd);
- return(mm->context.skas.mm_fd);
+ ret = init_stub_pte(mm, CONFIG_STUB_CODE,
+ (unsigned long) &__syscall_stub_start);
+ if(ret)
+ goto out_free;
+
+ ret = init_stub_pte(mm, CONFIG_STUB_DATA, stack);
+ if(ret)
+ goto out_free;
+
+ mm->nr_ptes--;
}
+ mm_id->stack = stack;
- return(0);
+ if(proc_mm){
+ if((cur_mm != NULL) && (cur_mm != &init_mm))
+ from = cur_mm_id->u.mm_fd;
+ else from = -1;
+
+ ret = new_mm(from, stack);
+ if(ret < 0){
+ printk("init_new_context_skas - new_mm failed, "
+ "errno = %d\n", ret);
+ goto out_free;
+ }
+ mm_id->u.mm_fd = ret;
+ }
+ else {
+ if((cur_mm != NULL) && (cur_mm != &init_mm))
+ mm_id->u.pid = copy_context_skas0(stack,
+ cur_mm_id->u.pid);
+ else mm_id->u.pid = start_userspace(stack);
+ }
+
+ return 0;
+
+ out_free:
+ if(mm_id->stack != 0)
+ free_page(mm_id->stack);
+ out:
+ return ret;
}
void destroy_context_skas(struct mm_struct *mm)
{
- os_close_file(mm->context.skas.mm_fd);
-}
+ struct mmu_context_skas *mmu = &mm->context.skas;
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only. This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
+ if(proc_mm)
+ os_close_file(mmu->id.u.mm_fd);
+ else
+ os_kill_ptraced_process(mmu->id.u.pid, 1);
+
+ if(!proc_mm || !ptrace_faultinfo){
+ free_page(mmu->id.stack);
+ pte_free_kernel((pte_t *) mmu->last_page_table);
+ dec_page_state(nr_page_table_pages);
+#ifdef CONFIG_3_LEVEL_PGTABLES
+ pmd_free((pmd_t *) mmu->last_pmd);
+#endif
+ }
+}
diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c
index 773cd2b525fc..5cd0e9929789 100644
--- a/arch/um/kernel/skas/process.c
+++ b/arch/um/kernel/skas/process.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2002- 2004 Jeff Dike (jdike@addtoit.com)
* Licensed under the GPL
*/
@@ -13,7 +13,9 @@
#include <sys/wait.h>
#include <sys/mman.h>
#include <sys/user.h>
+#include <sys/time.h>
#include <asm/unistd.h>
+#include <asm/types.h>
#include "user.h"
#include "ptrace_user.h"
#include "time_user.h"
@@ -21,13 +23,18 @@
#include "user_util.h"
#include "kern_util.h"
#include "skas.h"
+#include "stub-data.h"
+#include "mm_id.h"
#include "sysdep/sigcontext.h"
+#include "sysdep/stub.h"
#include "os.h"
#include "proc_mm.h"
#include "skas_ptrace.h"
#include "chan_user.h"
#include "signal_user.h"
#include "registers.h"
+#include "mem.h"
+#include "uml-config.h"
#include "process.h"
int is_skas_winch(int pid, int fd, void *data)
@@ -39,20 +46,59 @@ int is_skas_winch(int pid, int fd, void *data)
return(1);
}
-void get_skas_faultinfo(int pid, struct faultinfo * fi)
+void wait_stub_done(int pid, int sig, char * fname)
{
- int err;
-
- err = ptrace(PTRACE_FAULTINFO, pid, 0, fi);
- if(err)
- panic("get_skas_faultinfo - PTRACE_FAULTINFO failed, "
- "errno = %d\n", errno);
+ int n, status, err;
+
+ do {
+ if ( sig != -1 ) {
+ err = ptrace(PTRACE_CONT, pid, 0, sig);
+ if(err)
+ panic("%s : continue failed, errno = %d\n",
+ fname, errno);
+ }
+ sig = 0;
+
+ CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
+ } while((n >= 0) && WIFSTOPPED(status) &&
+ ((WSTOPSIG(status) == SIGVTALRM) ||
+ /* running UML inside a detached screen can cause
+ * SIGWINCHes
+ */
+ (WSTOPSIG(status) == SIGWINCH)));
+
+ if((n < 0) || !WIFSTOPPED(status) ||
+ (WSTOPSIG(status) != SIGUSR1 && WSTOPSIG(status) != SIGTRAP)){
+ panic("%s : failed to wait for SIGUSR1/SIGTRAP, "
+ "pid = %d, n = %d, errno = %d, status = 0x%x\n",
+ fname, pid, n, errno, status);
+ }
+}
- /* Special handling for i386, which has different structs */
- if (sizeof(struct ptrace_faultinfo) < sizeof(struct faultinfo))
- memset((char *)fi + sizeof(struct ptrace_faultinfo), 0,
- sizeof(struct faultinfo) -
- sizeof(struct ptrace_faultinfo));
+void get_skas_faultinfo(int pid, struct faultinfo * fi)
+{
+ int err;
+
+ if(ptrace_faultinfo){
+ err = ptrace(PTRACE_FAULTINFO, pid, 0, fi);
+ if(err)
+ panic("get_skas_faultinfo - PTRACE_FAULTINFO failed, "
+ "errno = %d\n", errno);
+
+ /* Special handling for i386, which has different structs */
+ if (sizeof(struct ptrace_faultinfo) < sizeof(struct faultinfo))
+ memset((char *)fi + sizeof(struct ptrace_faultinfo), 0,
+ sizeof(struct faultinfo) -
+ sizeof(struct ptrace_faultinfo));
+ }
+ else {
+ wait_stub_done(pid, SIGSEGV, "get_skas_faultinfo");
+
+ /* faultinfo is prepared by the stub-segv-handler at start of
+ * the stub stack page. We just have to copy it.
+ */
+ memcpy(fi, (void *)current_stub_stack(), sizeof(*fi));
+ }
}
static void handle_segv(int pid, union uml_pt_regs * regs)
@@ -91,11 +137,58 @@ static void handle_trap(int pid, union uml_pt_regs *regs, int local_using_sysemu
handle_syscall(regs);
}
-static int userspace_tramp(void *arg)
+extern int __syscall_stub_start;
+int stub_code_fd = -1;
+__u64 stub_code_offset;
+
+static int userspace_tramp(void *stack)
{
- init_new_thread_signals(0);
- enable_timer();
+ void *addr;
+
ptrace(PTRACE_TRACEME, 0, 0, 0);
+
+ init_new_thread_signals(1);
+ enable_timer();
+
+ if(!proc_mm){
+ /* This has a pte, but it can't be mapped in with the usual
+ * tlb_flush mechanism because this is part of that mechanism
+ */
+ addr = mmap64((void *) UML_CONFIG_STUB_CODE, page_size(),
+ PROT_EXEC, MAP_FIXED | MAP_PRIVATE,
+ stub_code_fd, stub_code_offset);
+ if(addr == MAP_FAILED){
+ printk("mapping stub code failed, errno = %d\n",
+ errno);
+ exit(1);
+ }
+
+ if(stack != NULL){
+ int fd;
+ __u64 offset;
+
+ fd = phys_mapping(to_phys(stack), &offset);
+ addr = mmap((void *) UML_CONFIG_STUB_DATA, page_size(),
+ PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_SHARED, fd, offset);
+ if(addr == MAP_FAILED){
+ printk("mapping stub stack failed, "
+ "errno = %d\n", errno);
+ exit(1);
+ }
+ }
+ }
+ if(!ptrace_faultinfo){
+ unsigned long v = UML_CONFIG_STUB_CODE +
+ (unsigned long) stub_segv_handler -
+ (unsigned long) &__syscall_stub_start;
+
+ set_sigstack((void *) UML_CONFIG_STUB_DATA, page_size());
+ set_handler(SIGSEGV, (void *) v, SA_ONSTACK,
+ SIGIO, SIGWINCH, SIGALRM, SIGVTALRM,
+ SIGUSR1, -1);
+ }
+
os_stop_process(os_getpid());
return(0);
}
@@ -105,11 +198,15 @@ static int userspace_tramp(void *arg)
#define NR_CPUS 1
int userspace_pid[NR_CPUS];
-void start_userspace(int cpu)
+int start_userspace(unsigned long stub_stack)
{
void *stack;
unsigned long sp;
- int pid, status, n;
+ int pid, status, n, flags;
+
+ if ( stub_code_fd == -1 )
+ stub_code_fd = phys_mapping(to_phys(&__syscall_stub_start),
+ &stub_code_offset);
stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
@@ -117,8 +214,9 @@ void start_userspace(int cpu)
panic("start_userspace : mmap failed, errno = %d", errno);
sp = (unsigned long) stack + PAGE_SIZE - sizeof(void *);
- pid = clone(userspace_tramp, (void *) sp,
- CLONE_FILES | CLONE_VM | SIGCHLD, NULL);
+ flags = CLONE_FILES | SIGCHLD;
+ if(proc_mm) flags |= CLONE_VM;
+ pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack);
if(pid < 0)
panic("start_userspace : clone failed, errno = %d", errno);
@@ -140,7 +238,7 @@ void start_userspace(int cpu)
if(munmap(stack, PAGE_SIZE) < 0)
panic("start_userspace : munmap failed, errno = %d\n", errno);
- userspace_pid[cpu] = pid;
+ return(pid);
}
void userspace(union uml_pt_regs *regs)
@@ -174,7 +272,9 @@ void userspace(union uml_pt_regs *regs)
if(WIFSTOPPED(status)){
switch(WSTOPSIG(status)){
case SIGSEGV:
- handle_segv(pid, regs);
+ if(PTRACE_FULL_FAULTINFO || !ptrace_faultinfo)
+ user_signal(SIGSEGV, regs, pid);
+ else handle_segv(pid, regs);
break;
case SIGTRAP + 0x80:
handle_trap(pid, regs, local_using_sysemu);
@@ -194,6 +294,7 @@ void userspace(union uml_pt_regs *regs)
printk("userspace - child stopped with signal "
"%d\n", WSTOPSIG(status));
}
+ pid = userspace_pid[0];
interrupt_end();
/* Avoid -ERESTARTSYS handling in host */
@@ -207,6 +308,114 @@ void userspace(union uml_pt_regs *regs)
#define INIT_JMP_HALT 3
#define INIT_JMP_REBOOT 4
+
+int copy_context_skas0(unsigned long new_stack, int pid)
+{
+ int err;
+ unsigned long regs[MAX_REG_NR];
+ unsigned long current_stack = current_stub_stack();
+ struct stub_data *data = (struct stub_data *) current_stack;
+ struct stub_data *child_data = (struct stub_data *) new_stack;
+ __u64 new_offset;
+ int new_fd = phys_mapping(to_phys((void *)new_stack), &new_offset);
+
+ /* prepare offset and fd of child's stack as argument for parent's
+ * and child's mmap2 calls
+ */
+ *data = ((struct stub_data) { .offset = MMAP_OFFSET(new_offset),
+ .fd = new_fd,
+ .timer = ((struct itimerval)
+ { { 0, 1000000 / hz() },
+ { 0, 1000000 / hz() }})});
+ get_safe_registers(regs);
+
+ /* Set parent's instruction pointer to start of clone-stub */
+ regs[REGS_IP_INDEX] = UML_CONFIG_STUB_CODE +
+ (unsigned long) stub_clone_handler -
+ (unsigned long) &__syscall_stub_start;
+ regs[REGS_SP_INDEX] = UML_CONFIG_STUB_DATA + PAGE_SIZE -
+ sizeof(void *);
+ err = ptrace_setregs(pid, regs);
+ if(err < 0)
+ panic("copy_context_skas0 : PTRACE_SETREGS failed, "
+ "pid = %d, errno = %d\n", pid, errno);
+
+ /* set a well known return code for detection of child write failure */
+ child_data->err = 12345678;
+
+ /* Wait, until parent has finished its work: read child's pid from
+ * parent's stack, and check, if bad result.
+ */
+ wait_stub_done(pid, 0, "copy_context_skas0");
+
+ pid = data->err;
+ if(pid < 0)
+ panic("copy_context_skas0 - stub-parent reports error %d\n",
+ pid);
+
+ /* Wait, until child has finished too: read child's result from
+ * child's stack and check it.
+ */
+ wait_stub_done(pid, -1, "copy_context_skas0");
+ if (child_data->err != UML_CONFIG_STUB_DATA)
+ panic("copy_context_skas0 - stub-child reports error %d\n",
+ child_data->err);
+
+ if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL,
+ (void *)PTRACE_O_TRACESYSGOOD) < 0)
+ panic("copy_context_skas0 : PTRACE_SETOPTIONS failed, "
+ "errno = %d\n", errno);
+
+ return pid;
+}
+
+/*
+ * This is used only, if proc_mm is available, while PTRACE_FAULTINFO
+ * isn't. Opening /proc/mm creates a new mm_context, which lacks the stub-pages
+ * Thus, we map them using /proc/mm-fd
+ */
+void map_stub_pages(int fd, unsigned long code,
+ unsigned long data, unsigned long stack)
+{
+ struct proc_mm_op mmop;
+ int n;
+
+ mmop = ((struct proc_mm_op) { .op = MM_MMAP,
+ .u =
+ { .mmap =
+ { .addr = code,
+ .len = PAGE_SIZE,
+ .prot = PROT_EXEC,
+ .flags = MAP_FIXED | MAP_PRIVATE,
+ .fd = stub_code_fd,
+ .offset = stub_code_offset
+ } } });
+ n = os_write_file(fd, &mmop, sizeof(mmop));
+ if(n != sizeof(mmop))
+ panic("map_stub_pages : /proc/mm map for code failed, "
+ "err = %d\n", -n);
+
+ if ( stack ) {
+ __u64 map_offset;
+ int map_fd = phys_mapping(to_phys((void *)stack), &map_offset);
+ mmop = ((struct proc_mm_op)
+ { .op = MM_MMAP,
+ .u =
+ { .mmap =
+ { .addr = data,
+ .len = PAGE_SIZE,
+ .prot = PROT_READ | PROT_WRITE,
+ .flags = MAP_FIXED | MAP_SHARED,
+ .fd = map_fd,
+ .offset = map_offset
+ } } });
+ n = os_write_file(fd, &mmop, sizeof(mmop));
+ if(n != sizeof(mmop))
+ panic("map_stub_pages : /proc/mm map for data failed, "
+ "err = %d\n", -n);
+ }
+}
+
void new_thread(void *stack, void **switch_buf_ptr, void **fork_buf_ptr,
void (*handler)(int))
{
@@ -334,21 +543,19 @@ void reboot_skas(void)
siglongjmp(initial_jmpbuf, INIT_JMP_REBOOT);
}
-void switch_mm_skas(int mm_fd)
+void switch_mm_skas(struct mm_id *mm_idp)
{
int err;
#warning need cpu pid in switch_mm_skas
- err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0, mm_fd);
- if(err)
- panic("switch_mm_skas - PTRACE_SWITCH_MM failed, errno = %d\n",
- errno);
-}
-
-void kill_off_processes_skas(void)
-{
-#warning need to loop over userspace_pids in kill_off_processes_skas
- os_kill_ptraced_process(userspace_pid[0], 1);
+ if(proc_mm){
+ err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0,
+ mm_idp->u.mm_fd);
+ if(err)
+ panic("switch_mm_skas - PTRACE_SWITCH_MM failed, "
+ "errno = %d\n", errno);
+ }
+ else userspace_pid[0] = mm_idp->u.pid;
}
/*
diff --git a/arch/um/kernel/skas/process_kern.c b/arch/um/kernel/skas/process_kern.c
index 0a7b8aa55db8..3d1b227226e6 100644
--- a/arch/um/kernel/skas/process_kern.c
+++ b/arch/um/kernel/skas/process_kern.c
@@ -129,7 +129,9 @@ int copy_thread_skas(int nr, unsigned long clone_flags, unsigned long sp,
return(0);
}
-int new_mm(int from)
+extern void map_stub_pages(int fd, unsigned long code,
+ unsigned long data, unsigned long stack);
+int new_mm(int from, unsigned long stack)
{
struct proc_mm_op copy;
int n, fd;
@@ -148,6 +150,9 @@ int new_mm(int from)
"err = %d\n", -n);
}
+ if(!ptrace_faultinfo)
+ map_stub_pages(fd, CONFIG_STUB_CODE, CONFIG_STUB_DATA, stack);
+
return(fd);
}
@@ -175,9 +180,12 @@ static int start_kernel_proc(void *unused)
return(0);
}
+extern int userspace_pid[];
+
int start_uml_skas(void)
{
- start_userspace(0);
+ if(proc_mm)
+ userspace_pid[0] = start_userspace(0);
init_new_thread_signals(1);
@@ -199,3 +207,31 @@ int thread_pid_skas(struct task_struct *task)
#warning Need to look up userspace_pid by cpu
return(userspace_pid[0]);
}
+
+void kill_off_processes_skas(void)
+{
+ if(proc_mm)
+#warning need to loop over userspace_pids in kill_off_processes_skas
+ os_kill_ptraced_process(userspace_pid[0], 1);
+ else {
+ struct task_struct *p;
+ int pid, me;
+
+ me = os_getpid();
+ for_each_process(p){
+ if(p->mm == NULL)
+ continue;
+
+ pid = p->mm->context.skas.id.u.pid;
+ os_kill_ptraced_process(pid, 1);
+ }
+ }
+}
+
+unsigned long current_stub_stack(void)
+{
+ if(current->mm == NULL)
+ return(0);
+
+ return(current->mm->context.skas.id.stack);
+}
diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c
new file mode 100644
index 000000000000..51fb94076fcf
--- /dev/null
+++ b/arch/um/kernel/skas/syscall.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#include "linux/sys.h"
+#include "linux/ptrace.h"
+#include "asm/errno.h"
+#include "asm/unistd.h"
+#include "asm/ptrace.h"
+#include "asm/current.h"
+#include "sysdep/syscalls.h"
+#include "kern_util.h"
+#include "syscall.h"
+
+void handle_syscall(union uml_pt_regs *r)
+{
+ struct pt_regs *regs = container_of(r, struct pt_regs, regs);
+ long result;
+ int syscall;
+#ifdef UML_CONFIG_SYSCALL_DEBUG
+ int index;
+
+ index = record_syscall_start(UPT_SYSCALL_NR(r));
+#endif
+ syscall_trace(r, 0);
+
+ current->thread.nsyscalls++;
+ nsyscalls++;
+
+ /* This should go in the declaration of syscall, but when I do that,
+ * strace -f -c bash -c 'ls ; ls' breaks, sometimes not tracing
+ * children at all, sometimes hanging when bash doesn't see the first
+ * ls exit.
+ * The assembly looks functionally the same to me. This is
+ * gcc version 4.0.1 20050727 (Red Hat 4.0.1-5)
+ * in case it's a compiler bug.
+ */
+ syscall = UPT_SYSCALL_NR(r);
+ if((syscall >= NR_syscalls) || (syscall < 0))
+ result = -ENOSYS;
+ else result = EXECUTE_SYSCALL(syscall, regs);
+
+ REGS_SET_SYSCALL_RETURN(r->skas.regs, result);
+
+ syscall_trace(r, 1);
+#ifdef UML_CONFIG_SYSCALL_DEBUG
+ record_syscall_end(index, result);
+#endif
+}
diff --git a/arch/um/kernel/skas/syscall_kern.c b/arch/um/kernel/skas/syscall_kern.c
deleted file mode 100644
index bdf040ce5b8e..000000000000
--- a/arch/um/kernel/skas/syscall_kern.c
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com)
- * Licensed under the GPL
- */
-
-#include "linux/sys.h"
-#include "linux/ptrace.h"
-#include "asm/errno.h"
-#include "asm/unistd.h"
-#include "asm/ptrace.h"
-#include "asm/current.h"
-#include "sysdep/syscalls.h"
-#include "kern_util.h"
-
-extern syscall_handler_t *sys_call_table[];
-
-long execute_syscall_skas(void *r)
-{
- struct pt_regs *regs = r;
- long res;
- int syscall;
-
- current->thread.nsyscalls++;
- nsyscalls++;
- syscall = UPT_SYSCALL_NR(&regs->regs);
-
- if((syscall >= NR_syscalls) || (syscall < 0))
- res = -ENOSYS;
- else res = EXECUTE_SYSCALL(syscall, regs);
-
- return(res);
-}
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only. This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/skas/syscall_user.c b/arch/um/kernel/skas/syscall_user.c
deleted file mode 100644
index 2828e6e37721..000000000000
--- a/arch/um/kernel/skas/syscall_user.c
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#include <stdlib.h>
-#include <signal.h>
-#include "kern_util.h"
-#include "uml-config.h"
-#include "syscall_user.h"
-#include "sysdep/ptrace.h"
-#include "sysdep/sigcontext.h"
-#include "skas.h"
-
-void handle_syscall(union uml_pt_regs *regs)
-{
- long result;
-#if UML_CONFIG_SYSCALL_DEBUG
- int index;
-
- index = record_syscall_start(UPT_SYSCALL_NR(regs));
-#endif
-
- syscall_trace(regs, 0);
- result = execute_syscall_skas(regs);
-
- REGS_SET_SYSCALL_RETURN(regs->skas.regs, result);
-
- syscall_trace(regs, 1);
-#if UML_CONFIG_SYSCALL_DEBUG
- record_syscall_end(index, result);
-#endif
-}
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only. This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/skas/tlb.c b/arch/um/kernel/skas/tlb.c
index b8c5e71763d1..6e84963dfc29 100644
--- a/arch/um/kernel/skas/tlb.c
+++ b/arch/um/kernel/skas/tlb.c
@@ -6,6 +6,7 @@
#include "linux/stddef.h"
#include "linux/sched.h"
+#include "linux/config.h"
#include "linux/mm.h"
#include "asm/page.h"
#include "asm/pgtable.h"
@@ -17,41 +18,50 @@
#include "os.h"
#include "tlb.h"
-static void do_ops(int fd, struct host_vm_op *ops, int last)
+static int do_ops(union mm_context *mmu, struct host_vm_op *ops, int last,
+ int finished, void **flush)
{
struct host_vm_op *op;
- int i;
+ int i, ret = 0;
- for(i = 0; i <= last; i++){
+ for(i = 0; i <= last && !ret; i++){
op = &ops[i];
switch(op->type){
case MMAP:
- map(fd, op->u.mmap.addr, op->u.mmap.len,
- op->u.mmap.r, op->u.mmap.w, op->u.mmap.x,
- op->u.mmap.fd, op->u.mmap.offset);
+ ret = map(&mmu->skas.id, op->u.mmap.addr,
+ op->u.mmap.len, op->u.mmap.r, op->u.mmap.w,
+ op->u.mmap.x, op->u.mmap.fd,
+ op->u.mmap.offset, finished, flush);
break;
case MUNMAP:
- unmap(fd, (void *) op->u.munmap.addr,
- op->u.munmap.len);
+ ret = unmap(&mmu->skas.id,
+ (void *) op->u.munmap.addr,
+ op->u.munmap.len, finished, flush);
break;
case MPROTECT:
- protect(fd, op->u.mprotect.addr, op->u.mprotect.len,
- op->u.mprotect.r, op->u.mprotect.w,
- op->u.mprotect.x);
+ ret = protect(&mmu->skas.id, op->u.mprotect.addr,
+ op->u.mprotect.len, op->u.mprotect.r,
+ op->u.mprotect.w, op->u.mprotect.x,
+ finished, flush);
break;
default:
printk("Unknown op type %d in do_ops\n", op->type);
break;
}
}
+
+ return ret;
}
+extern int proc_mm;
+
static void fix_range(struct mm_struct *mm, unsigned long start_addr,
unsigned long end_addr, int force)
{
- int fd = mm->context.skas.mm_fd;
+ if(!proc_mm && (end_addr > CONFIG_STUB_START))
+ end_addr = CONFIG_STUB_START;
- fix_range_common(mm, start_addr, end_addr, force, fd, do_ops);
+ fix_range_common(mm, start_addr, end_addr, force, do_ops);
}
void __flush_tlb_one_skas(unsigned long addr)
@@ -69,17 +79,20 @@ void flush_tlb_range_skas(struct vm_area_struct *vma, unsigned long start,
void flush_tlb_mm_skas(struct mm_struct *mm)
{
+ unsigned long end;
+
/* Don't bother flushing if this address space is about to be
* destroyed.
*/
if(atomic_read(&mm->mm_users) == 0)
return;
- fix_range(mm, 0, host_task_size, 0);
- flush_tlb_kernel_range_common(start_vm, end_vm);
+ end = proc_mm ? task_size : CONFIG_STUB_START;
+ fix_range(mm, 0, end, 0);
}
void force_flush_all_skas(void)
{
- fix_range(current->mm, 0, host_task_size, 1);
+ unsigned long end = proc_mm ? task_size : CONFIG_STUB_START;
+ fix_range(current->mm, 0, end, 1);
}
diff --git a/arch/um/kernel/skas/trap_user.c b/arch/um/kernel/skas/trap_user.c
index 0dee1d95c806..9950a6716fe5 100644
--- a/arch/um/kernel/skas/trap_user.c
+++ b/arch/um/kernel/skas/trap_user.c
@@ -58,7 +58,6 @@ void user_signal(int sig, union uml_pt_regs *regs, int pid)
int segv = ((sig == SIGFPE) || (sig == SIGSEGV) || (sig == SIGBUS) ||
(sig == SIGILL) || (sig == SIGTRAP));
- regs->skas.is_user = 1;
if (segv)
get_skas_faultinfo(pid, &regs->skas.faultinfo);
info = &sig_info[sig];
diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c
new file mode 100644
index 000000000000..1429c131879d
--- /dev/null
+++ b/arch/um/kernel/syscall.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#include "kern_util.h"
+#include "syscall.h"
+#include "os.h"
+
+struct {
+ int syscall;
+ int pid;
+ long result;
+ unsigned long long start;
+ unsigned long long end;
+} syscall_record[1024];
+
+int record_syscall_start(int syscall)
+{
+ int max, index;
+
+ max = sizeof(syscall_record)/sizeof(syscall_record[0]);
+ index = next_syscall_index(max);
+
+ syscall_record[index].syscall = syscall;
+ syscall_record[index].pid = current_pid();
+ syscall_record[index].result = 0xdeadbeef;
+ syscall_record[index].start = os_usecs();
+ return(index);
+}
+
+void record_syscall_end(int index, long result)
+{
+ syscall_record[index].result = result;
+ syscall_record[index].end = os_usecs();
+}
diff --git a/arch/um/kernel/syscall_user.c b/arch/um/kernel/syscall_user.c
deleted file mode 100644
index 01b711e00a85..000000000000
--- a/arch/um/kernel/syscall_user.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#include <stdlib.h>
-#include <sys/time.h>
-#include "kern_util.h"
-#include "syscall_user.h"
-
-struct {
- int syscall;
- int pid;
- long result;
- struct timeval start;
- struct timeval end;
-} syscall_record[1024];
-
-int record_syscall_start(int syscall)
-{
- int max, index;
-
- max = sizeof(syscall_record)/sizeof(syscall_record[0]);
- index = next_syscall_index(max);
-
- syscall_record[index].syscall = syscall;
- syscall_record[index].pid = current_pid();
- syscall_record[index].result = 0xdeadbeef;
- gettimeofday(&syscall_record[index].start, NULL);
- return(index);
-}
-
-void record_syscall_end(int index, long result)
-{
- syscall_record[index].result = result;
- gettimeofday(&syscall_record[index].end, NULL);
-}
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only. This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index f829b309b63c..c40b611e3d93 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -48,6 +48,13 @@ void enable_timer(void)
set_interval(ITIMER_VIRTUAL);
}
+void prepare_timer(void * ptr)
+{
+ int usec = 1000000/hz();
+ *(struct itimerval *)ptr = ((struct itimerval) { { 0, usec },
+ { 0, usec }});
+}
+
void disable_timer(void)
{
struct itimerval disable = ((struct itimerval) { { 0, 0 }, { 0, 0 }});
diff --git a/arch/um/kernel/time_kern.c b/arch/um/kernel/time_kern.c
index a8b4ef601f59..4e08f7545d63 100644
--- a/arch/um/kernel/time_kern.c
+++ b/arch/um/kernel/time_kern.c
@@ -137,7 +137,10 @@ long um_stime(int __user *tptr)
void timer_handler(int sig, union uml_pt_regs *regs)
{
local_irq_disable();
- update_process_times(CHOOSE_MODE(user_context(UPT_SP(regs)), (regs)->skas.is_user));
+ irq_enter();
+ update_process_times(CHOOSE_MODE(user_context(UPT_SP(regs)),
+ (regs)->skas.is_user));
+ irq_exit();
local_irq_enable();
if(current_thread->cpu == 0)
timer_irq(regs);
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index eda477edfdf5..80ed6188e8a2 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -15,33 +15,144 @@
#include "mem_user.h"
#include "os.h"
+static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
+ int r, int w, int x, struct host_vm_op *ops, int *index,
+ int last_filled, union mm_context *mmu, void **flush,
+ int (*do_ops)(union mm_context *, struct host_vm_op *,
+ int, int, void **))
+{
+ __u64 offset;
+ struct host_vm_op *last;
+ int fd, ret = 0;
+
+ fd = phys_mapping(phys, &offset);
+ if(*index != -1){
+ last = &ops[*index];
+ if((last->type == MMAP) &&
+ (last->u.mmap.addr + last->u.mmap.len == virt) &&
+ (last->u.mmap.r == r) && (last->u.mmap.w == w) &&
+ (last->u.mmap.x == x) && (last->u.mmap.fd == fd) &&
+ (last->u.mmap.offset + last->u.mmap.len == offset)){
+ last->u.mmap.len += len;
+ return 0;
+ }
+ }
+
+ if(*index == last_filled){
+ ret = (*do_ops)(mmu, ops, last_filled, 0, flush);
+ *index = -1;
+ }
+
+ ops[++*index] = ((struct host_vm_op) { .type = MMAP,
+ .u = { .mmap = {
+ .addr = virt,
+ .len = len,
+ .r = r,
+ .w = w,
+ .x = x,
+ .fd = fd,
+ .offset = offset }
+ } });
+ return ret;
+}
+
+static int add_munmap(unsigned long addr, unsigned long len,
+ struct host_vm_op *ops, int *index, int last_filled,
+ union mm_context *mmu, void **flush,
+ int (*do_ops)(union mm_context *, struct host_vm_op *,
+ int, int, void **))
+{
+ struct host_vm_op *last;
+ int ret = 0;
+
+ if(*index != -1){
+ last = &ops[*index];
+ if((last->type == MUNMAP) &&
+ (last->u.munmap.addr + last->u.mmap.len == addr)){
+ last->u.munmap.len += len;
+ return 0;
+ }
+ }
+
+ if(*index == last_filled){
+ ret = (*do_ops)(mmu, ops, last_filled, 0, flush);
+ *index = -1;
+ }
+
+ ops[++*index] = ((struct host_vm_op) { .type = MUNMAP,
+ .u = { .munmap = {
+ .addr = addr,
+ .len = len } } });
+ return ret;
+}
+
+static int add_mprotect(unsigned long addr, unsigned long len, int r, int w,
+ int x, struct host_vm_op *ops, int *index,
+ int last_filled, union mm_context *mmu, void **flush,
+ int (*do_ops)(union mm_context *, struct host_vm_op *,
+ int, int, void **))
+{
+ struct host_vm_op *last;
+ int ret = 0;
+
+ if(*index != -1){
+ last = &ops[*index];
+ if((last->type == MPROTECT) &&
+ (last->u.mprotect.addr + last->u.mprotect.len == addr) &&
+ (last->u.mprotect.r == r) && (last->u.mprotect.w == w) &&
+ (last->u.mprotect.x == x)){
+ last->u.mprotect.len += len;
+ return 0;
+ }
+ }
+
+ if(*index == last_filled){
+ ret = (*do_ops)(mmu, ops, last_filled, 0, flush);
+ *index = -1;
+ }
+
+ ops[++*index] = ((struct host_vm_op) { .type = MPROTECT,
+ .u = { .mprotect = {
+ .addr = addr,
+ .len = len,
+ .r = r,
+ .w = w,
+ .x = x } } });
+ return ret;
+}
+
#define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1))
void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
- unsigned long end_addr, int force, int data,
- void (*do_ops)(int, struct host_vm_op *, int))
+ unsigned long end_addr, int force,
+ int (*do_ops)(union mm_context *, struct host_vm_op *,
+ int, int, void **))
{
pgd_t *npgd;
pud_t *npud;
pmd_t *npmd;
pte_t *npte;
+ union mm_context *mmu = &mm->context;
unsigned long addr, end;
int r, w, x;
- struct host_vm_op ops[16];
+ struct host_vm_op ops[1];
+ void *flush = NULL;
int op_index = -1, last_op = sizeof(ops) / sizeof(ops[0]) - 1;
+ int ret = 0;
if(mm == NULL) return;
- for(addr = start_addr; addr < end_addr;){
+ ops[0].type = NONE;
+ for(addr = start_addr; addr < end_addr && !ret;){
npgd = pgd_offset(mm, addr);
if(!pgd_present(*npgd)){
end = ADD_ROUND(addr, PGDIR_SIZE);
if(end > end_addr)
end = end_addr;
if(force || pgd_newpage(*npgd)){
- op_index = add_munmap(addr, end - addr, ops,
- op_index, last_op, data,
- do_ops);
+ ret = add_munmap(addr, end - addr, ops,
+ &op_index, last_op, mmu,
+ &flush, do_ops);
pgd_mkuptodate(*npgd);
}
addr = end;
@@ -54,9 +165,9 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
if(end > end_addr)
end = end_addr;
if(force || pud_newpage(*npud)){
- op_index = add_munmap(addr, end - addr, ops,
- op_index, last_op, data,
- do_ops);
+ ret = add_munmap(addr, end - addr, ops,
+ &op_index, last_op, mmu,
+ &flush, do_ops);
pud_mkuptodate(*npud);
}
addr = end;
@@ -69,9 +180,9 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
if(end > end_addr)
end = end_addr;
if(force || pmd_newpage(*npmd)){
- op_index = add_munmap(addr, end - addr, ops,
- op_index, last_op, data,
- do_ops);
+ ret = add_munmap(addr, end - addr, ops,
+ &op_index, last_op, mmu,
+ &flush, do_ops);
pmd_mkuptodate(*npmd);
}
addr = end;
@@ -90,24 +201,32 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
}
if(force || pte_newpage(*npte)){
if(pte_present(*npte))
- op_index = add_mmap(addr,
- pte_val(*npte) & PAGE_MASK,
- PAGE_SIZE, r, w, x, ops,
- op_index, last_op, data,
- do_ops);
- else op_index = add_munmap(addr, PAGE_SIZE, ops,
- op_index, last_op, data,
- do_ops);
+ ret = add_mmap(addr,
+ pte_val(*npte) & PAGE_MASK,
+ PAGE_SIZE, r, w, x, ops,
+ &op_index, last_op, mmu,
+ &flush, do_ops);
+ else ret = add_munmap(addr, PAGE_SIZE, ops,
+ &op_index, last_op, mmu,
+ &flush, do_ops);
}
else if(pte_newprot(*npte))
- op_index = add_mprotect(addr, PAGE_SIZE, r, w, x, ops,
- op_index, last_op, data,
- do_ops);
+ ret = add_mprotect(addr, PAGE_SIZE, r, w, x, ops,
+ &op_index, last_op, mmu,
+ &flush, do_ops);
*npte = pte_mkuptodate(*npte);
addr += PAGE_SIZE;
}
- (*do_ops)(data, ops, op_index);
+
+ if(!ret)
+ ret = (*do_ops)(mmu, ops, op_index, 1, &flush);
+
+ /* This is not an else because ret is modified above */
+ if(ret) {
+ printk("fix_range_common: failed, killing current process\n");
+ force_sig(SIGKILL, current);
+ }
}
int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
@@ -195,51 +314,6 @@ int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
return(updated);
}
-void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
-{
- address &= PAGE_MASK;
- flush_tlb_range(vma, address, address + PAGE_SIZE);
-}
-
-void flush_tlb_all(void)
-{
- flush_tlb_mm(current->mm);
-}
-
-void flush_tlb_kernel_range(unsigned long start, unsigned long end)
-{
- CHOOSE_MODE_PROC(flush_tlb_kernel_range_tt,
- flush_tlb_kernel_range_common, start, end);
-}
-
-void flush_tlb_kernel_vm(void)
-{
- CHOOSE_MODE(flush_tlb_kernel_vm_tt(),
- flush_tlb_kernel_range_common(start_vm, end_vm));
-}
-
-void __flush_tlb_one(unsigned long addr)
-{
- CHOOSE_MODE_PROC(__flush_tlb_one_tt, __flush_tlb_one_skas, addr);
-}
-
-void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
- unsigned long end)
-{
- CHOOSE_MODE_PROC(flush_tlb_range_tt, flush_tlb_range_skas, vma, start,
- end);
-}
-
-void flush_tlb_mm(struct mm_struct *mm)
-{
- CHOOSE_MODE_PROC(flush_tlb_mm_tt, flush_tlb_mm_skas, mm);
-}
-
-void force_flush_all(void)
-{
- CHOOSE_MODE(force_flush_all_tt(), force_flush_all_skas());
-}
-
pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address)
{
return(pgd_offset(mm, address));
@@ -269,101 +343,48 @@ pte_t *addr_pte(struct task_struct *task, unsigned long addr)
return(pte_offset_map(pmd, addr));
}
-int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
- int r, int w, int x, struct host_vm_op *ops, int index,
- int last_filled, int data,
- void (*do_ops)(int, struct host_vm_op *, int))
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
{
- __u64 offset;
- struct host_vm_op *last;
- int fd;
-
- fd = phys_mapping(phys, &offset);
- if(index != -1){
- last = &ops[index];
- if((last->type == MMAP) &&
- (last->u.mmap.addr + last->u.mmap.len == virt) &&
- (last->u.mmap.r == r) && (last->u.mmap.w == w) &&
- (last->u.mmap.x == x) && (last->u.mmap.fd == fd) &&
- (last->u.mmap.offset + last->u.mmap.len == offset)){
- last->u.mmap.len += len;
- return(index);
- }
- }
-
- if(index == last_filled){
- (*do_ops)(data, ops, last_filled);
- index = -1;
- }
-
- ops[++index] = ((struct host_vm_op) { .type = MMAP,
- .u = { .mmap = {
- .addr = virt,
- .len = len,
- .r = r,
- .w = w,
- .x = x,
- .fd = fd,
- .offset = offset }
- } });
- return(index);
+ address &= PAGE_MASK;
+ flush_tlb_range(vma, address, address + PAGE_SIZE);
}
-int add_munmap(unsigned long addr, unsigned long len, struct host_vm_op *ops,
- int index, int last_filled, int data,
- void (*do_ops)(int, struct host_vm_op *, int))
+void flush_tlb_all(void)
{
- struct host_vm_op *last;
-
- if(index != -1){
- last = &ops[index];
- if((last->type == MUNMAP) &&
- (last->u.munmap.addr + last->u.mmap.len == addr)){
- last->u.munmap.len += len;
- return(index);
- }
- }
+ flush_tlb_mm(current->mm);
+}
- if(index == last_filled){
- (*do_ops)(data, ops, last_filled);
- index = -1;
- }
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+ CHOOSE_MODE_PROC(flush_tlb_kernel_range_tt,
+ flush_tlb_kernel_range_common, start, end);
+}
- ops[++index] = ((struct host_vm_op) { .type = MUNMAP,
- .u = { .munmap = {
- .addr = addr,
- .len = len } } });
- return(index);
+void flush_tlb_kernel_vm(void)
+{
+ CHOOSE_MODE(flush_tlb_kernel_vm_tt(),
+ flush_tlb_kernel_range_common(start_vm, end_vm));
}
-int add_mprotect(unsigned long addr, unsigned long len, int r, int w, int x,
- struct host_vm_op *ops, int index, int last_filled, int data,
- void (*do_ops)(int, struct host_vm_op *, int))
+void __flush_tlb_one(unsigned long addr)
{
- struct host_vm_op *last;
+ CHOOSE_MODE_PROC(__flush_tlb_one_tt, __flush_tlb_one_skas, addr);
+}
- if(index != -1){
- last = &ops[index];
- if((last->type == MPROTECT) &&
- (last->u.mprotect.addr + last->u.mprotect.len == addr) &&
- (last->u.mprotect.r == r) && (last->u.mprotect.w == w) &&
- (last->u.mprotect.x == x)){
- last->u.mprotect.len += len;
- return(index);
- }
- }
+void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+{
+ CHOOSE_MODE_PROC(flush_tlb_range_tt, flush_tlb_range_skas, vma, start,
+ end);
+}
- if(index == last_filled){
- (*do_ops)(data, ops, last_filled);
- index = -1;
- }
+void flush_tlb_mm(struct mm_struct *mm)
+{
+ CHOOSE_MODE_PROC(flush_tlb_mm_tt, flush_tlb_mm_skas, mm);
+}
- ops[++index] = ((struct host_vm_op) { .type = MPROTECT,
- .u = { .mprotect = {
- .addr = addr,
- .len = len,
- .r = r,
- .w = w,
- .x = x } } });
- return(index);
+void force_flush_all(void)
+{
+ CHOOSE_MODE(force_flush_all_tt(), force_flush_all_skas());
}
+
diff --git a/arch/um/kernel/trap_kern.c b/arch/um/kernel/trap_kern.c
index c20aef120598..87cc6fd76ced 100644
--- a/arch/um/kernel/trap_kern.c
+++ b/arch/um/kernel/trap_kern.c
@@ -26,6 +26,7 @@
#include "mem.h"
#include "mem_kern.h"
+/* Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by segv(). */
int handle_page_fault(unsigned long address, unsigned long ip,
int is_write, int is_user, int *code_out)
{
@@ -35,7 +36,6 @@ int handle_page_fault(unsigned long address, unsigned long ip,
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
- unsigned long page;
int err = -EFAULT;
*code_out = SEGV_MAPERR;
@@ -52,17 +52,17 @@ int handle_page_fault(unsigned long address, unsigned long ip,
else if(expand_stack(vma, address))
goto out;
- good_area:
+good_area:
*code_out = SEGV_ACCERR;
if(is_write && !(vma->vm_flags & VM_WRITE))
goto out;
- if(!(vma->vm_flags & (VM_READ | VM_EXEC)))
+ /* Don't require VM_READ|VM_EXEC for write faults! */
+ if(!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC)))
goto out;
- page = address & PAGE_MASK;
do {
- survive:
+survive:
switch (handle_mm_fault(mm, vma, address, is_write)){
case VM_FAULT_MINOR:
current->min_flt++;
@@ -79,16 +79,15 @@ int handle_page_fault(unsigned long address, unsigned long ip,
default:
BUG();
}
- pgd = pgd_offset(mm, page);
- pud = pud_offset(pgd, page);
- pmd = pmd_offset(pud, page);
- pte = pte_offset_kernel(pmd, page);
+ pgd = pgd_offset(mm, address);
+ pud = pud_offset(pgd, address);
+ pmd = pmd_offset(pud, address);
+ pte = pte_offset_kernel(pmd, address);
} while(!pte_present(*pte));
err = 0;
- *pte = pte_mkyoung(*pte);
- if(pte_write(*pte)) *pte = pte_mkdirty(*pte);
- flush_tlb_page(vma, page);
- out:
+ WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte)));
+ flush_tlb_page(vma, address);
+out:
up_read(&mm->mmap_sem);
return(err);
@@ -144,19 +143,18 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, void *sc)
panic("Kernel mode fault at addr 0x%lx, ip 0x%lx",
address, ip);
- if(err == -EACCES){
+ if (err == -EACCES) {
si.si_signo = SIGBUS;
si.si_errno = 0;
si.si_code = BUS_ADRERR;
si.si_addr = (void *)address;
current->thread.arch.faultinfo = fi;
force_sig_info(SIGBUS, &si, current);
- }
- else if(err == -ENOMEM){
+ } else if (err == -ENOMEM) {
printk("VM: killing process %s\n", current->comm);
do_exit(SIGKILL);
- }
- else {
+ } else {
+ BUG_ON(err != -EFAULT);
si.si_signo = SIGSEGV;
si.si_addr = (void *) address;
current->thread.arch.faultinfo = fi;
@@ -200,30 +198,3 @@ void winch(int sig, union uml_pt_regs *regs)
void trap_init(void)
{
}
-
-DEFINE_SPINLOCK(trap_lock);
-
-static int trap_index = 0;
-
-int next_trap_index(int limit)
-{
- int ret;
-
- spin_lock(&trap_lock);
- ret = trap_index;
- if(++trap_index == limit)
- trap_index = 0;
- spin_unlock(&trap_lock);
- return(ret);
-}
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only. This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/trap_user.c b/arch/um/kernel/trap_user.c
index f825a6eda3f5..e9ccd6b8d3c7 100644
--- a/arch/um/kernel/trap_user.c
+++ b/arch/um/kernel/trap_user.c
@@ -40,35 +40,14 @@ void kill_child_dead(int pid)
} while(1);
}
-/* Unlocked - don't care if this is a bit off */
-int nsegfaults = 0;
-
-struct {
- unsigned long address;
- int is_write;
- int pid;
- unsigned long sp;
- int is_user;
-} segfault_record[1024];
-
void segv_handler(int sig, union uml_pt_regs *regs)
{
- int index, max;
struct faultinfo * fi = UPT_FAULTINFO(regs);
if(UPT_IS_USER(regs) && !SEGV_IS_FIXABLE(fi)){
bad_segv(*fi, UPT_IP(regs));
return;
}
- max = sizeof(segfault_record)/sizeof(segfault_record[0]);
- index = next_trap_index(max);
-
- nsegfaults++;
- segfault_record[index].address = FAULT_ADDRESS(*fi);
- segfault_record[index].pid = os_getpid();
- segfault_record[index].is_write = FAULT_WRITE(*fi);
- segfault_record[index].sp = UPT_SP(regs);
- segfault_record[index].is_user = UPT_IS_USER(regs);
segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs);
}
diff --git a/arch/um/kernel/tt/include/uaccess-tt.h b/arch/um/kernel/tt/include/uaccess-tt.h
index 3fbb5fe26f49..aa6db384af80 100644
--- a/arch/um/kernel/tt/include/uaccess-tt.h
+++ b/arch/um/kernel/tt/include/uaccess-tt.h
@@ -33,7 +33,7 @@ extern unsigned long uml_physmem;
(((unsigned long) (addr) <= ((unsigned long) (addr) + (size))) && \
(under_task_size(addr, size) || is_stack(addr, size))))
-static inline int verify_area_tt(int type, const void * addr,
+static inline int verify_area_tt(int type, const void __user * addr,
unsigned long size)
{
return(access_ok_tt(type, addr, size) ? 0 : -EFAULT);
@@ -50,12 +50,12 @@ extern int __do_clear_user(void *mem, size_t len, void **fault_addr,
extern int __do_strnlen_user(const char *str, unsigned long n,
void **fault_addr, void **fault_catcher);
-extern int copy_from_user_tt(void *to, const void *from, int n);
-extern int copy_to_user_tt(void *to, const void *from, int n);
-extern int strncpy_from_user_tt(char *dst, const char *src, int count);
-extern int __clear_user_tt(void *mem, int len);
-extern int clear_user_tt(void *mem, int len);
-extern int strnlen_user_tt(const void *str, int len);
+extern int copy_from_user_tt(void *to, const void __user *from, int n);
+extern int copy_to_user_tt(void __user *to, const void *from, int n);
+extern int strncpy_from_user_tt(char *dst, const char __user *src, int count);
+extern int __clear_user_tt(void __user *mem, int len);
+extern int clear_user_tt(void __user *mem, int len);
+extern int strnlen_user_tt(const void __user *str, int len);
#endif
diff --git a/arch/um/kernel/tt/syscall_kern.c b/arch/um/kernel/tt/syscall_kern.c
index 2650a628719e..3d29c90514cc 100644
--- a/arch/um/kernel/tt/syscall_kern.c
+++ b/arch/um/kernel/tt/syscall_kern.c
@@ -12,36 +12,41 @@
#include "asm/uaccess.h"
#include "asm/stat.h"
#include "sysdep/syscalls.h"
+#include "sysdep/sigcontext.h"
#include "kern_util.h"
+#include "syscall.h"
-extern syscall_handler_t *sys_call_table[];
-
-long execute_syscall_tt(void *r)
+void syscall_handler_tt(int sig, struct pt_regs *regs)
{
- struct pt_regs *regs = r;
- long res;
+ void *sc;
+ long result;
int syscall;
-
#ifdef CONFIG_SYSCALL_DEBUG
+ int index;
+ index = record_syscall_start(syscall);
+#endif
+ sc = UPT_SC(&regs->regs);
+ SC_START_SYSCALL(sc);
+
+ syscall_trace(&regs->regs, 0);
+
current->thread.nsyscalls++;
nsyscalls++;
-#endif
syscall = UPT_SYSCALL_NR(&regs->regs);
if((syscall >= NR_syscalls) || (syscall < 0))
- res = -ENOSYS;
- else res = EXECUTE_SYSCALL(syscall, regs);
+ result = -ENOSYS;
+ else result = EXECUTE_SYSCALL(syscall, regs);
- return(res);
-}
+ /* regs->sc may have changed while the system call ran (there may
+ * have been an interrupt or segfault), so it needs to be refreshed.
+ */
+ UPT_SC(&regs->regs) = sc;
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only. This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
+ SC_SET_SYSCALL_RETURN(sc, result);
+
+ syscall_trace(&regs->regs, 1);
+#ifdef CONFIG_SYSCALL_DEBUG
+ record_syscall_end(index, result);
+#endif
+}
diff --git a/arch/um/kernel/tt/syscall_user.c b/arch/um/kernel/tt/syscall_user.c
index b218316cfdb2..902987bf379b 100644
--- a/arch/um/kernel/tt/syscall_user.c
+++ b/arch/um/kernel/tt/syscall_user.c
@@ -13,42 +13,9 @@
#include "task.h"
#include "user_util.h"
#include "kern_util.h"
-#include "syscall_user.h"
+#include "syscall.h"
#include "tt.h"
-
-void syscall_handler_tt(int sig, union uml_pt_regs *regs)
-{
- void *sc;
- long result;
- int syscall;
-#ifdef UML_CONFIG_DEBUG_SYSCALL
- int index;
-#endif
-
- syscall = UPT_SYSCALL_NR(regs);
- sc = UPT_SC(regs);
- SC_START_SYSCALL(sc);
-
-#ifdef UML_CONFIG_DEBUG_SYSCALL
- index = record_syscall_start(syscall);
-#endif
- syscall_trace(regs, 0);
- result = execute_syscall_tt(regs);
-
- /* regs->sc may have changed while the system call ran (there may
- * have been an interrupt or segfault), so it needs to be refreshed.
- */
- UPT_SC(regs) = sc;
-
- SC_SET_SYSCALL_RETURN(sc, result);
-
- syscall_trace(regs, 1);
-#ifdef UML_CONFIG_DEBUG_SYSCALL
- record_syscall_end(index, result);
-#endif
-}
-
void do_sigtrap(void *task)
{
UPT_SYSCALL_NR(TASK_REGS(task)) = -1;
diff --git a/arch/um/kernel/tt/tlb.c b/arch/um/kernel/tt/tlb.c
index 203216ad86f1..f1d85dbb45b9 100644
--- a/arch/um/kernel/tt/tlb.c
+++ b/arch/um/kernel/tt/tlb.c
@@ -17,25 +17,31 @@
#include "os.h"
#include "tlb.h"
-static void do_ops(int unused, struct host_vm_op *ops, int last)
+static int do_ops(union mm_context *mmu, struct host_vm_op *ops, int last,
+ int finished, void **flush)
{
struct host_vm_op *op;
- int i;
+ int i, ret=0;
- for(i = 0; i <= last; i++){
+ for(i = 0; i <= last && !ret; i++){
op = &ops[i];
switch(op->type){
case MMAP:
- os_map_memory((void *) op->u.mmap.addr, op->u.mmap.fd,
- op->u.mmap.offset, op->u.mmap.len,
- op->u.mmap.r, op->u.mmap.w,
- op->u.mmap.x);
+ ret = os_map_memory((void *) op->u.mmap.addr,
+ op->u.mmap.fd, op->u.mmap.offset,
+ op->u.mmap.len, op->u.mmap.r,
+ op->u.mmap.w, op->u.mmap.x);
break;
case MUNMAP:
- os_unmap_memory((void *) op->u.munmap.addr,
- op->u.munmap.len);
+ ret = os_unmap_memory((void *) op->u.munmap.addr,
+ op->u.munmap.len);
break;
case MPROTECT:
+ ret = protect_memory(op->u.mprotect.addr,
+ op->u.munmap.len,
+ op->u.mprotect.r,
+ op->u.mprotect.w,
+ op->u.mprotect.x, 1);
protect_memory(op->u.mprotect.addr, op->u.munmap.len,
op->u.mprotect.r, op->u.mprotect.w,
op->u.mprotect.x, 1);
@@ -45,6 +51,8 @@ static void do_ops(int unused, struct host_vm_op *ops, int last)
break;
}
}
+
+ return ret;
}
static void fix_range(struct mm_struct *mm, unsigned long start_addr,
@@ -55,7 +63,7 @@ static void fix_range(struct mm_struct *mm, unsigned long start_addr,
panic("fix_range fixing wrong address space, current = 0x%p",
current);
- fix_range_common(mm, start_addr, end_addr, force, 0, do_ops);
+ fix_range_common(mm, start_addr, end_addr, force, do_ops);
}
atomic_t vmchange_seq = ATOMIC_INIT(1);
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 8736d098f0ee..09f6f7ce4695 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -38,6 +38,9 @@
#include "choose-mode.h"
#include "mode_kern.h"
#include "mode.h"
+#ifdef UML_CONFIG_MODE_SKAS
+#include "skas.h"
+#endif
#define DEFAULT_COMMAND_LINE "root=98:0"
@@ -123,7 +126,7 @@ unsigned long start_vm;
unsigned long end_vm;
int ncpus = 1;
-#ifdef CONFIG_MODE_TT
+#ifdef CONFIG_CMDLINE_ON_HOST
/* Pointer set in linux_main, the array itself is private to each thread,
* and changed at address space creation time so this poses no concurrency
* problems.
@@ -138,7 +141,7 @@ long physmem_size = 32 * 1024 * 1024;
void set_cmdline(char *cmd)
{
-#ifdef CONFIG_MODE_TT
+#ifdef CONFIG_CMDLINE_ON_HOST
char *umid, *ptr;
if(CHOOSE_MODE(honeypot, 0)) return;
@@ -318,6 +321,7 @@ int linux_main(int argc, char **argv)
unsigned long avail, diff;
unsigned long virtmem_size, max_physmem;
unsigned int i, add;
+ char * mode;
for (i = 1; i < argc; i++){
if((i == 1) && (argv[i][0] == ' ')) continue;
@@ -329,6 +333,7 @@ int linux_main(int argc, char **argv)
if(have_root == 0)
add_arg(DEFAULT_COMMAND_LINE);
+ os_early_checks();
mode_tt = force_tt ? 1 : !can_do_skas();
#ifndef CONFIG_MODE_TT
if (mode_tt) {
@@ -338,6 +343,21 @@ int linux_main(int argc, char **argv)
exit(1);
}
#endif
+
+#ifndef CONFIG_MODE_SKAS
+ mode = "TT";
+#else
+ /* Show to the user the result of selection */
+ if (mode_tt)
+ mode = "TT";
+ else if (proc_mm && ptrace_faultinfo)
+ mode = "SKAS3";
+ else
+ mode = "SKAS0";
+#endif
+
+ printf("UML running in %s mode\n", mode);
+
uml_start = CHOOSE_MODE_PROC(set_task_sizes_tt, set_task_sizes_skas, 0,
&host_task_size, &task_size);
@@ -366,7 +386,7 @@ int linux_main(int argc, char **argv)
setup_machinename(system_utsname.machine);
-#ifdef CONFIG_MODE_TT
+#ifdef CONFIG_CMDLINE_ON_HOST
argv1_begin = argv[1];
argv1_end = &argv[1][strlen(argv[1])];
#endif
@@ -451,7 +471,6 @@ void __init setup_arch(char **cmdline_p)
void __init check_bugs(void)
{
arch_check_bugs();
- check_ptrace();
check_sigio();
check_devanon();
}
diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S
index 61dfd4fef752..af11915ce0a8 100644
--- a/arch/um/kernel/uml.lds.S
+++ b/arch/um/kernel/uml.lds.S
@@ -16,8 +16,8 @@ SECTIONS
__binary_start = .;
#ifdef MODE_TT
- .remap_data : { arch/um/sys-SUBARCH/unmap_fin.o (.data .bss) }
- .remap : { arch/um/sys-SUBARCH/unmap_fin.o (.text) }
+ .remap_data : { UNMAP_PATH (.data .bss) }
+ .remap : { UNMAP_PATH (.text) }
. = ALIGN(4096); /* Init code and data */
#endif
@@ -30,6 +30,7 @@ SECTIONS
_einittext = .;
}
. = ALIGN(4096);
+
.text :
{
*(.text)
@@ -39,6 +40,12 @@ SECTIONS
/* .gnu.warning sections are handled specially by elf32.em. */
*(.gnu.warning)
*(.gnu.linkonce.t*)
+
+ . = ALIGN(4096);
+ __syscall_stub_start = .;
+ *(.__syscall_stub*)
+ __syscall_stub_end = .;
+ . = ALIGN(4096);
}
#include "asm/common.lds.S"
@@ -86,14 +93,10 @@ SECTIONS
*(.bss)
*(COMMON)
}
- _end = . ;
+ _end = .;
PROVIDE (end = .);
- /* Stabs debugging sections. */
- .stab 0 : { *(.stab) }
- .stabstr 0 : { *(.stabstr) }
- .stab.excl 0 : { *(.stab.excl) }
- .stab.exclstr 0 : { *(.stab.exclstr) }
- .stab.index 0 : { *(.stab.index) }
- .stab.indexstr 0 : { *(.stab.indexstr) }
- .comment 0 : { *(.comment) }
+
+ STABS_DEBUG
+
+ DWARF_DEBUG
}
diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile
index 4ddf540284ce..7a1662419c0c 100644
--- a/arch/um/os-Linux/Makefile
+++ b/arch/um/os-Linux/Makefile
@@ -3,11 +3,19 @@
# Licensed under the GPL
#
-obj-y = elf_aux.o file.o process.o signal.o time.o tty.o user_syms.o drivers/ \
- sys-$(SUBARCH)/
+obj-y = aio.o elf_aux.o file.o process.o signal.o start_up.o time.o tt.o \
+ tty.o user_syms.o drivers/ sys-$(SUBARCH)/
-USER_OBJS := elf_aux.o file.o process.o signal.o time.o tty.o
+USER_OBJS := aio.o elf_aux.o file.o process.o signal.o start_up.o time.o tt.o \
+ tty.o
+
+elf_aux.o: $(ARCH_DIR)/kernel-offsets.h
+CFLAGS_elf_aux.o += -I$(objtree)/arch/um
CFLAGS_user_syms.o += -DSUBARCH_$(SUBARCH)
+HAVE_AIO_ABI := $(shell [ -r /usr/include/linux/aio_abi.h ] && \
+ echo -DHAVE_AIO_ABI )
+CFLAGS_aio.o += $(HAVE_AIO_ABI)
+
include arch/um/scripts/Makefile.rules
diff --git a/arch/um/os-Linux/aio.c b/arch/um/os-Linux/aio.c
new file mode 100644
index 000000000000..b04897cd995d
--- /dev/null
+++ b/arch/um/os-Linux/aio.c
@@ -0,0 +1,414 @@
+/*
+ * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <string.h>
+#include <errno.h>
+#include <sched.h>
+#include <sys/syscall.h>
+#include "os.h"
+#include "helper.h"
+#include "aio.h"
+#include "init.h"
+#include "user.h"
+#include "mode.h"
+
+static int aio_req_fd_r = -1;
+static int aio_req_fd_w = -1;
+
+static int update_aio(struct aio_context *aio, int res)
+{
+ if(res < 0)
+ aio->len = res;
+ else if((res == 0) && (aio->type == AIO_READ)){
+ /* This is the EOF case - we have hit the end of the file
+ * and it ends in a partial block, so we fill the end of
+ * the block with zeros and claim success.
+ */
+ memset(aio->data, 0, aio->len);
+ aio->len = 0;
+ }
+ else if(res > 0){
+ aio->len -= res;
+ aio->data += res;
+ aio->offset += res;
+ return aio->len;
+ }
+
+ return 0;
+}
+
+#if defined(HAVE_AIO_ABI)
+#include <linux/aio_abi.h>
+
+/* If we have the headers, we are going to build with AIO enabled.
+ * If we don't have aio in libc, we define the necessary stubs here.
+ */
+
+#if !defined(HAVE_AIO_LIBC)
+
+static long io_setup(int n, aio_context_t *ctxp)
+{
+ return syscall(__NR_io_setup, n, ctxp);
+}
+
+static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
+{
+ return syscall(__NR_io_submit, ctx, nr, iocbpp);
+}
+
+static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
+ struct io_event *events, struct timespec *timeout)
+{
+ return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout);
+}
+
+#endif
+
+/* The AIO_MMAP cases force the mmapped page into memory here
+ * rather than in whatever place first touches the data. I used
+ * to do this by touching the page, but that's delicate because
+ * gcc is prone to optimizing that away. So, what's done here
+ * is we read from the descriptor from which the page was
+ * mapped. The caller is required to pass an offset which is
+ * inside the page that was mapped. Thus, when the read
+ * returns, we know that the page is in the page cache, and
+ * that it now backs the mmapped area.
+ */
+
+static int do_aio(aio_context_t ctx, struct aio_context *aio)
+{
+ struct iocb iocb, *iocbp = &iocb;
+ char c;
+ int err;
+
+ iocb = ((struct iocb) { .aio_data = (unsigned long) aio,
+ .aio_reqprio = 0,
+ .aio_fildes = aio->fd,
+ .aio_buf = (unsigned long) aio->data,
+ .aio_nbytes = aio->len,
+ .aio_offset = aio->offset,
+ .aio_reserved1 = 0,
+ .aio_reserved2 = 0,
+ .aio_reserved3 = 0 });
+
+ switch(aio->type){
+ case AIO_READ:
+ iocb.aio_lio_opcode = IOCB_CMD_PREAD;
+ break;
+ case AIO_WRITE:
+ iocb.aio_lio_opcode = IOCB_CMD_PWRITE;
+ break;
+ case AIO_MMAP:
+ iocb.aio_lio_opcode = IOCB_CMD_PREAD;
+ iocb.aio_buf = (unsigned long) &c;
+ iocb.aio_nbytes = sizeof(c);
+ break;
+ default:
+ printk("Bogus op in do_aio - %d\n", aio->type);
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = io_submit(ctx, 1, &iocbp);
+ if(err > 0)
+ err = 0;
+
+ out:
+ return err;
+}
+
+static aio_context_t ctx = 0;
+
+static int aio_thread(void *arg)
+{
+ struct aio_thread_reply reply;
+ struct aio_context *aio;
+ struct io_event event;
+ int err, n;
+
+ signal(SIGWINCH, SIG_IGN);
+
+ while(1){
+ n = io_getevents(ctx, 1, 1, &event, NULL);
+ if(n < 0){
+ if(errno == EINTR)
+ continue;
+ printk("aio_thread - io_getevents failed, "
+ "errno = %d\n", errno);
+ }
+ else {
+ aio = (struct aio_context *) event.data;
+ if(update_aio(aio, event.res)){
+ do_aio(ctx, aio);
+ continue;
+ }
+
+ reply = ((struct aio_thread_reply)
+ { .data = aio,
+ .err = aio->len });
+ err = os_write_file(aio->reply_fd, &reply,
+ sizeof(reply));
+ if(err != sizeof(reply))
+ printk("aio_thread - write failed, "
+ "fd = %d, err = %d\n", aio->reply_fd,
+ -err);
+ }
+ }
+ return 0;
+}
+
+#endif
+
+static int do_not_aio(struct aio_context *aio)
+{
+ char c;
+ int err;
+
+ switch(aio->type){
+ case AIO_READ:
+ err = os_seek_file(aio->fd, aio->offset);
+ if(err)
+ goto out;
+
+ err = os_read_file(aio->fd, aio->data, aio->len);
+ break;
+ case AIO_WRITE:
+ err = os_seek_file(aio->fd, aio->offset);
+ if(err)
+ goto out;
+
+ err = os_write_file(aio->fd, aio->data, aio->len);
+ break;
+ case AIO_MMAP:
+ err = os_seek_file(aio->fd, aio->offset);
+ if(err)
+ goto out;
+
+ err = os_read_file(aio->fd, &c, sizeof(c));
+ break;
+ default:
+ printk("do_not_aio - bad request type : %d\n", aio->type);
+ err = -EINVAL;
+ break;
+ }
+
+ out:
+ return err;
+}
+
+static int not_aio_thread(void *arg)
+{
+ struct aio_context *aio;
+ struct aio_thread_reply reply;
+ int err;
+
+ signal(SIGWINCH, SIG_IGN);
+ while(1){
+ err = os_read_file(aio_req_fd_r, &aio, sizeof(aio));
+ if(err != sizeof(aio)){
+ if(err < 0)
+ printk("not_aio_thread - read failed, "
+ "fd = %d, err = %d\n", aio_req_fd_r,
+ -err);
+ else {
+ printk("not_aio_thread - short read, fd = %d, "
+ "length = %d\n", aio_req_fd_r, err);
+ }
+ continue;
+ }
+ again:
+ err = do_not_aio(aio);
+
+ if(update_aio(aio, err))
+ goto again;
+
+ reply = ((struct aio_thread_reply) { .data = aio,
+ .err = aio->len });
+ err = os_write_file(aio->reply_fd, &reply, sizeof(reply));
+ if(err != sizeof(reply))
+ printk("not_aio_thread - write failed, fd = %d, "
+ "err = %d\n", aio_req_fd_r, -err);
+ }
+}
+
+static int submit_aio_24(struct aio_context *aio)
+{
+ int err;
+
+ err = os_write_file(aio_req_fd_w, &aio, sizeof(aio));
+ if(err == sizeof(aio))
+ err = 0;
+
+ return err;
+}
+
+static int aio_pid = -1;
+static int (*submit_proc)(struct aio_context *aio);
+
+static int init_aio_24(void)
+{
+ unsigned long stack;
+ int fds[2], err;
+
+ err = os_pipe(fds, 1, 1);
+ if(err)
+ goto out;
+
+ aio_req_fd_w = fds[0];
+ aio_req_fd_r = fds[1];
+ err = run_helper_thread(not_aio_thread, NULL,
+ CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
+ if(err < 0)
+ goto out_close_pipe;
+
+ aio_pid = err;
+ goto out;
+
+ out_close_pipe:
+ os_close_file(fds[0]);
+ os_close_file(fds[1]);
+ aio_req_fd_w = -1;
+ aio_req_fd_r = -1;
+ out:
+#ifndef HAVE_AIO_ABI
+ printk("/usr/include/linux/aio_abi.h not present during build\n");
+#endif
+ printk("2.6 host AIO support not used - falling back to I/O "
+ "thread\n");
+
+ submit_proc = submit_aio_24;
+
+ return 0;
+}
+
+#ifdef HAVE_AIO_ABI
+#define DEFAULT_24_AIO 0
+static int submit_aio_26(struct aio_context *aio)
+{
+ struct aio_thread_reply reply;
+ int err;
+
+ err = do_aio(ctx, aio);
+ if(err){
+ reply = ((struct aio_thread_reply) { .data = aio,
+ .err = err });
+ err = os_write_file(aio->reply_fd, &reply, sizeof(reply));
+ if(err != sizeof(reply))
+ printk("submit_aio_26 - write failed, "
+ "fd = %d, err = %d\n", aio->reply_fd, -err);
+ else err = 0;
+ }
+
+ return err;
+}
+
+static int init_aio_26(void)
+{
+ unsigned long stack;
+ int err;
+
+ if(io_setup(256, &ctx)){
+ printk("aio_thread failed to initialize context, err = %d\n",
+ errno);
+ return -errno;
+ }
+
+ err = run_helper_thread(aio_thread, NULL,
+ CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
+ if(err < 0)
+ return -errno;
+
+ aio_pid = err;
+
+ printk("Using 2.6 host AIO\n");
+
+ submit_proc = submit_aio_26;
+
+ return 0;
+}
+
+#else
+#define DEFAULT_24_AIO 1
+static int submit_aio_26(struct aio_context *aio)
+{
+ return -ENOSYS;
+}
+
+static int init_aio_26(void)
+{
+ submit_proc = submit_aio_26;
+ return -ENOSYS;
+}
+#endif
+
+static int aio_24 = DEFAULT_24_AIO;
+
+static int __init set_aio_24(char *name, int *add)
+{
+ aio_24 = 1;
+ return 0;
+}
+
+__uml_setup("aio=2.4", set_aio_24,
+"aio=2.4\n"
+" This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n"
+" available. 2.4 AIO is a single thread that handles one request at a\n"
+" time, synchronously. 2.6 AIO is a thread which uses the 2.6 AIO \n"
+" interface to handle an arbitrary number of pending requests. 2.6 AIO \n"
+" is not available in tt mode, on 2.4 hosts, or when UML is built with\n"
+" /usr/include/linux/aio_abi.h not available. Many distributions don't\n"
+" include aio_abi.h, so you will need to copy it from a kernel tree to\n"
+" your /usr/include/linux in order to build an AIO-capable UML\n\n"
+);
+
+static int init_aio(void)
+{
+ int err;
+
+ CHOOSE_MODE(({
+ if(!aio_24){
+ printk("Disabling 2.6 AIO in tt mode\n");
+ aio_24 = 1;
+ } }), (void) 0);
+
+ if(!aio_24){
+ err = init_aio_26();
+ if(err && (errno == ENOSYS)){
+ printk("2.6 AIO not supported on the host - "
+ "reverting to 2.4 AIO\n");
+ aio_24 = 1;
+ }
+ else return err;
+ }
+
+ if(aio_24)
+ return init_aio_24();
+
+ return 0;
+}
+
+/* The reason for the __initcall/__uml_exitcall asymmetry is that init_aio
+ * needs to be called when the kernel is running because it calls run_helper,
+ * which needs get_free_page. exit_aio is a __uml_exitcall because the generic
+ * kernel does not run __exitcalls on shutdown, and can't because many of them
+ * break when called outside of module unloading.
+ */
+__initcall(init_aio);
+
+static void exit_aio(void)
+{
+ if(aio_pid != -1)
+ os_kill_process(aio_pid, 1);
+}
+
+__uml_exitcall(exit_aio);
+
+int submit_aio(struct aio_context *aio)
+{
+ return (*submit_proc)(aio);
+}
diff --git a/arch/um/os-Linux/elf_aux.c b/arch/um/os-Linux/elf_aux.c
index f0d6060e3e57..1399520a8588 100644
--- a/arch/um/os-Linux/elf_aux.c
+++ b/arch/um/os-Linux/elf_aux.c
@@ -11,8 +11,10 @@
#include <stddef.h>
#include "init.h"
#include "elf_user.h"
+#include "mem_user.h"
+#include <kernel-offsets.h>
-#if ELF_CLASS == ELFCLASS32
+#if HOST_ELF_CLASS == ELFCLASS32
typedef Elf32_auxv_t elf_auxv_t;
#else
typedef Elf64_auxv_t elf_auxv_t;
@@ -40,6 +42,9 @@ __init void scan_elf_aux( char **envp)
break;
case AT_SYSINFO_EHDR:
vsyscall_ehdr = auxv->a_un.a_val;
+ /* See if the page is under TASK_SIZE */
+ if (vsyscall_ehdr < (unsigned long) envp)
+ vsyscall_ehdr = 0;
break;
case AT_HWCAP:
elf_aux_hwcap = auxv->a_un.a_val;
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index 1e126bfd31a7..d32413e4b4ce 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -3,10 +3,10 @@
* Licensed under the GPL
*/
-#include <unistd.h>
#include <stdio.h>
#include <errno.h>
#include <signal.h>
+#include <setjmp.h>
#include <linux/unistd.h>
#include <sys/mman.h>
#include <sys/wait.h>
@@ -14,6 +14,10 @@
#include "os.h"
#include "user.h"
#include "user_util.h"
+#include "signal_user.h"
+#include "process.h"
+#include "irq_user.h"
+#include "kern_util.h"
#define ARBITRARY_ADDR -1
#define FAILURE_PID -1
@@ -114,8 +118,10 @@ void os_usr1_process(int pid)
kill(pid, SIGUSR1);
}
-/*Don't use the glibc version, which caches the result in TLS. It misses some
- * syscalls, and also breaks with clone(), which does not unshare the TLS.*/
+/* Don't use the glibc version, which caches the result in TLS. It misses some
+ * syscalls, and also breaks with clone(), which does not unshare the TLS.
+ */
+
inline _syscall0(pid_t, getpid)
int os_getpid(void)
@@ -164,6 +170,52 @@ int os_unmap_memory(void *addr, int len)
return(0);
}
+void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int))
+{
+ int flags = 0, pages;
+
+ if(sig_stack != NULL){
+ pages = (1 << UML_CONFIG_KERNEL_STACK_ORDER);
+ set_sigstack(sig_stack, pages * page_size());
+ flags = SA_ONSTACK;
+ }
+ if(usr1_handler) set_handler(SIGUSR1, usr1_handler, flags, -1);
+}
+
+void init_new_thread_signals(int altstack)
+{
+ int flags = altstack ? SA_ONSTACK : 0;
+
+ set_handler(SIGSEGV, (__sighandler_t) sig_handler, flags,
+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
+ set_handler(SIGTRAP, (__sighandler_t) sig_handler, flags,
+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
+ set_handler(SIGFPE, (__sighandler_t) sig_handler, flags,
+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
+ set_handler(SIGILL, (__sighandler_t) sig_handler, flags,
+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
+ set_handler(SIGBUS, (__sighandler_t) sig_handler, flags,
+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
+ set_handler(SIGUSR2, (__sighandler_t) sig_handler,
+ flags, SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
+ signal(SIGHUP, SIG_IGN);
+
+ init_irq_signals(altstack);
+}
+
+int run_kernel_thread(int (*fn)(void *), void *arg, void **jmp_ptr)
+{
+ sigjmp_buf buf;
+ int n;
+
+ *jmp_ptr = &buf;
+ n = sigsetjmp(buf, 1);
+ if(n != 0)
+ return(n);
+ (*fn)(arg);
+ return(0);
+}
+
/*
* Overrides for Emacs so that we follow Linus's tabbing style.
* Emacs will notice this stuff at the end of the file and automatically
diff --git a/arch/um/kernel/process.c b/arch/um/os-Linux/start_up.c
index 1b5ef3e96c71..040cc1472bc7 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/os-Linux/start_up.c
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
* Licensed under the GPL
*/
@@ -19,7 +19,6 @@
#include "user_util.h"
#include "kern_util.h"
#include "user.h"
-#include "process.h"
#include "signal_kern.h"
#include "signal_user.h"
#include "sysdep/ptrace.h"
@@ -32,105 +31,14 @@
#include "uml-config.h"
#include "choose-mode.h"
#include "mode.h"
+#include "tempfile.h"
#ifdef UML_CONFIG_MODE_SKAS
#include "skas.h"
#include "skas_ptrace.h"
#include "registers.h"
#endif
-void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int))
-{
- int flags = 0, pages;
-
- if(sig_stack != NULL){
- pages = (1 << UML_CONFIG_KERNEL_STACK_ORDER);
- set_sigstack(sig_stack, pages * page_size());
- flags = SA_ONSTACK;
- }
- if(usr1_handler) set_handler(SIGUSR1, usr1_handler, flags, -1);
-}
-
-void init_new_thread_signals(int altstack)
-{
- int flags = altstack ? SA_ONSTACK : 0;
-
- set_handler(SIGSEGV, (__sighandler_t) sig_handler, flags,
- SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
- set_handler(SIGTRAP, (__sighandler_t) sig_handler, flags,
- SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
- set_handler(SIGFPE, (__sighandler_t) sig_handler, flags,
- SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
- set_handler(SIGILL, (__sighandler_t) sig_handler, flags,
- SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
- set_handler(SIGBUS, (__sighandler_t) sig_handler, flags,
- SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
- set_handler(SIGUSR2, (__sighandler_t) sig_handler,
- flags, SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
- signal(SIGHUP, SIG_IGN);
-
- init_irq_signals(altstack);
-}
-
-struct tramp {
- int (*tramp)(void *);
- void *tramp_data;
- unsigned long temp_stack;
- int flags;
- int pid;
-};
-
-/* See above for why sigkill is here */
-
-int sigkill = SIGKILL;
-
-int outer_tramp(void *arg)
-{
- struct tramp *t;
- int sig = sigkill;
-
- t = arg;
- t->pid = clone(t->tramp, (void *) t->temp_stack + page_size()/2,
- t->flags, t->tramp_data);
- if(t->pid > 0) wait_for_stop(t->pid, SIGSTOP, PTRACE_CONT, NULL);
- kill(os_getpid(), sig);
- _exit(0);
-}
-
-int start_fork_tramp(void *thread_arg, unsigned long temp_stack,
- int clone_flags, int (*tramp)(void *))
-{
- struct tramp arg;
- unsigned long sp;
- int new_pid, status, err;
-
- /* The trampoline will run on the temporary stack */
- sp = stack_sp(temp_stack);
-
- clone_flags |= CLONE_FILES | SIGCHLD;
-
- arg.tramp = tramp;
- arg.tramp_data = thread_arg;
- arg.temp_stack = temp_stack;
- arg.flags = clone_flags;
-
- /* Start the process and wait for it to kill itself */
- new_pid = clone(outer_tramp, (void *) sp, clone_flags, &arg);
- if(new_pid < 0)
- return(new_pid);
-
- CATCH_EINTR(err = waitpid(new_pid, &status, 0));
- if(err < 0)
- panic("Waiting for outer trampoline failed - errno = %d",
- errno);
-
- if(!WIFSIGNALED(status) || (WTERMSIG(status) != SIGKILL))
- panic("outer trampoline didn't exit with SIGKILL, "
- "status = %d", status);
-
- return(arg.pid);
-}
-
-static int ptrace_child(void)
+static int ptrace_child(void *arg)
{
int ret;
int pid = os_getpid(), ppid = getppid();
@@ -159,36 +67,44 @@ static int ptrace_child(void)
_exit(ret);
}
-static int start_ptraced_child(void)
+static int start_ptraced_child(void **stack_out)
{
+ void *stack;
+ unsigned long sp;
int pid, n, status;
-
- pid = fork();
- if(pid == 0)
- ptrace_child();
+ stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if(stack == MAP_FAILED)
+ panic("check_ptrace : mmap failed, errno = %d", errno);
+ sp = (unsigned long) stack + PAGE_SIZE - sizeof(void *);
+ pid = clone(ptrace_child, (void *) sp, SIGCHLD, NULL);
if(pid < 0)
- panic("check_ptrace : fork failed, errno = %d", errno);
+ panic("start_ptraced_child : clone failed, errno = %d", errno);
CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
if(n < 0)
- panic("check_ptrace : wait failed, errno = %d", errno);
+ panic("check_ptrace : clone failed, errno = %d", errno);
if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP))
panic("check_ptrace : expected SIGSTOP, got status = %d",
status);
+ *stack_out = stack;
return(pid);
}
-/* When testing for SYSEMU support, if it is one of the broken versions, we must
- * just avoid using sysemu, not panic, but only if SYSEMU features are broken.
+/* When testing for SYSEMU support, if it is one of the broken versions, we
+ * must just avoid using sysemu, not panic, but only if SYSEMU features are
+ * broken.
* So only for SYSEMU features we test mustpanic, while normal host features
- * must work anyway!*/
-static int stop_ptraced_child(int pid, int exitcode, int mustexit)
+ * must work anyway!
+ */
+static int stop_ptraced_child(int pid, void *stack, int exitcode,
+ int mustpanic)
{
int status, n, ret = 0;
if(ptrace(PTRACE_CONT, pid, 0, 0) < 0)
- panic("stop_ptraced_child : ptrace failed, errno = %d", errno);
+ panic("check_ptrace : ptrace failed, errno = %d", errno);
CATCH_EINTR(n = waitpid(pid, &status, 0));
if(!WIFEXITED(status) || (WEXITSTATUS(status) != exitcode)) {
int exit_with = WEXITSTATUS(status);
@@ -199,16 +115,32 @@ static int stop_ptraced_child(int pid, int exitcode, int mustexit)
printk("check_ptrace : child exited with exitcode %d, while "
"expecting %d; status 0x%x", exit_with,
exitcode, status);
- if (mustexit)
+ if (mustpanic)
panic("\n");
else
printk("\n");
ret = -1;
}
+ if(munmap(stack, PAGE_SIZE) < 0)
+ panic("check_ptrace : munmap failed, errno = %d", errno);
return ret;
}
+int ptrace_faultinfo = 1;
+int proc_mm = 1;
+
+static int __init skas0_cmd_param(char *str, int* add)
+{
+ ptrace_faultinfo = proc_mm = 0;
+ return 0;
+}
+
+__uml_setup("skas0", skas0_cmd_param,
+ "skas0\n"
+ " Disables SKAS3 usage, so that SKAS0 is used, unless \n"
+ " you specify mode=tt.\n\n");
+
static int force_sysemu_disabled = 0;
static int __init nosysemu_cmd_param(char *str, int* add)
@@ -218,20 +150,22 @@ static int __init nosysemu_cmd_param(char *str, int* add)
}
__uml_setup("nosysemu", nosysemu_cmd_param,
- "nosysemu\n"
- " Turns off syscall emulation patch for ptrace (SYSEMU) on.\n"
- " SYSEMU is a performance-patch introduced by Laurent Vivier. It changes\n"
- " behaviour of ptrace() and helps reducing host context switch rate.\n"
- " To make it working, you need a kernel patch for your host, too.\n"
- " See http://perso.wanadoo.fr/laurent.vivier/UML/ for further information.\n\n");
+"nosysemu\n"
+" Turns off syscall emulation patch for ptrace (SYSEMU) on.\n"
+" SYSEMU is a performance-patch introduced by Laurent Vivier. It changes\n"
+" behaviour of ptrace() and helps reducing host context switch rate.\n"
+" To make it working, you need a kernel patch for your host, too.\n"
+" See http://perso.wanadoo.fr/laurent.vivier/UML/ for further \n"
+" information.\n\n");
static void __init check_sysemu(void)
{
- int pid, syscall, n, status, count=0;
+ void *stack;
+ int pid, n, status, count=0;
printk("Checking syscall emulation patch for ptrace...");
sysemu_supported = 0;
- pid = start_ptraced_child();
+ pid = start_ptraced_child(&stack);
if(ptrace(PTRACE_SYSEMU, pid, 0, 0) < 0)
goto fail;
@@ -249,7 +183,7 @@ static void __init check_sysemu(void)
panic("check_sysemu : failed to modify system "
"call return, errno = %d", errno);
- if (stop_ptraced_child(pid, 0, 0) < 0)
+ if (stop_ptraced_child(pid, stack, 0, 0) < 0)
goto fail_stopped;
sysemu_supported = 1;
@@ -257,7 +191,13 @@ static void __init check_sysemu(void)
set_using_sysemu(!force_sysemu_disabled);
printk("Checking advanced syscall emulation patch for ptrace...");
- pid = start_ptraced_child();
+ pid = start_ptraced_child(&stack);
+
+ if(ptrace(PTRACE_OLDSETOPTIONS, pid, 0,
+ (void *) PTRACE_O_TRACESYSGOOD) < 0)
+ panic("check_ptrace: PTRACE_OLDSETOPTIONS failed, errno = %d",
+ errno);
+
while(1){
count++;
if(ptrace(PTRACE_SYSEMU_SINGLESTEP, pid, 0, 0) < 0)
@@ -265,15 +205,10 @@ static void __init check_sysemu(void)
CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
if(n < 0)
panic("check_ptrace : wait failed, errno = %d", errno);
- if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP))
- panic("check_ptrace : expected (SIGTRAP|SYSCALL_TRAP), "
- "got status = %d", status);
-
- syscall = ptrace(PTRACE_PEEKUSR, pid, PT_SYSCALL_NR_OFFSET,
- 0);
- if(syscall == __NR_getpid){
+ if(WIFSTOPPED(status) && (WSTOPSIG(status) == (SIGTRAP|0x80))){
if (!count)
- panic("check_ptrace : SYSEMU_SINGLESTEP doesn't singlestep");
+ panic("check_ptrace : SYSEMU_SINGLESTEP "
+ "doesn't singlestep");
n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_RET_OFFSET,
os_getpid());
if(n < 0)
@@ -281,8 +216,13 @@ static void __init check_sysemu(void)
"call return, errno = %d", errno);
break;
}
+ else if(WIFSTOPPED(status) && (WSTOPSIG(status) == SIGTRAP))
+ count++;
+ else
+ panic("check_ptrace : expected SIGTRAP or "
+ "(SIGTRAP|0x80), got status = %d", status);
}
- if (stop_ptraced_child(pid, 0, 0) < 0)
+ if (stop_ptraced_child(pid, stack, 0, 0) < 0)
goto fail_stopped;
sysemu_supported = 2;
@@ -293,32 +233,33 @@ static void __init check_sysemu(void)
return;
fail:
- stop_ptraced_child(pid, 1, 0);
+ stop_ptraced_child(pid, stack, 1, 0);
fail_stopped:
printk("missing\n");
}
-void __init check_ptrace(void)
+static void __init check_ptrace(void)
{
+ void *stack;
int pid, syscall, n, status;
printk("Checking that ptrace can change system call numbers...");
- pid = start_ptraced_child();
+ pid = start_ptraced_child(&stack);
- if (ptrace(PTRACE_OLDSETOPTIONS, pid, 0, (void *)PTRACE_O_TRACESYSGOOD) < 0)
- panic("check_ptrace: PTRACE_SETOPTIONS failed, errno = %d", errno);
+ if(ptrace(PTRACE_OLDSETOPTIONS, pid, 0, (void *)PTRACE_O_TRACESYSGOOD) < 0)
+ panic("check_ptrace: PTRACE_OLDSETOPTIONS failed, errno = %d", errno);
while(1){
if(ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0)
- panic("check_ptrace : ptrace failed, errno = %d",
+ panic("check_ptrace : ptrace failed, errno = %d",
errno);
CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
if(n < 0)
panic("check_ptrace : wait failed, errno = %d", errno);
- if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP + 0x80))
- panic("check_ptrace : expected SIGTRAP + 0x80, "
+ if(!WIFSTOPPED(status) || (WSTOPSIG(status) != (SIGTRAP|0x80)))
+ panic("check_ptrace : expected (SIGTRAP|0x80), "
"got status = %d", status);
-
+
syscall = ptrace(PTRACE_PEEKUSR, pid, PT_SYSCALL_NR_OFFSET,
0);
if(syscall == __NR_getpid){
@@ -330,77 +271,85 @@ void __init check_ptrace(void)
break;
}
}
- stop_ptraced_child(pid, 0, 1);
+ stop_ptraced_child(pid, stack, 0, 1);
printk("OK\n");
check_sysemu();
}
-int run_kernel_thread(int (*fn)(void *), void *arg, void **jmp_ptr)
+void os_early_checks(void)
{
- sigjmp_buf buf;
- int n;
-
- *jmp_ptr = &buf;
- n = sigsetjmp(buf, 1);
- if(n != 0)
- return(n);
- (*fn)(arg);
- return(0);
+ check_ptrace();
}
-void forward_pending_sigio(int target)
+static int __init noprocmm_cmd_param(char *str, int* add)
{
- sigset_t sigs;
+ proc_mm = 0;
+ return 0;
+}
- if(sigpending(&sigs))
- panic("forward_pending_sigio : sigpending failed");
- if(sigismember(&sigs, SIGIO))
- kill(target, SIGIO);
+__uml_setup("noprocmm", noprocmm_cmd_param,
+"noprocmm\n"
+" Turns off usage of /proc/mm, even if host supports it.\n"
+" To support /proc/mm, the host needs to be patched using\n"
+" the current skas3 patch.\n\n");
+
+static int __init noptracefaultinfo_cmd_param(char *str, int* add)
+{
+ ptrace_faultinfo = 0;
+ return 0;
}
+__uml_setup("noptracefaultinfo", noptracefaultinfo_cmd_param,
+"noptracefaultinfo\n"
+" Turns off usage of PTRACE_FAULTINFO, even if host supports\n"
+" it. To support PTRACE_FAULTINFO, the host needs to be patched\n"
+" using the current skas3 patch.\n\n");
+
#ifdef UML_CONFIG_MODE_SKAS
-static inline int check_skas3_ptrace_support(void)
+static inline void check_skas3_ptrace_support(void)
{
struct ptrace_faultinfo fi;
- int pid, n, ret = 1;
+ void *stack;
+ int pid, n;
printf("Checking for the skas3 patch in the host...");
- pid = start_ptraced_child();
+ pid = start_ptraced_child(&stack);
n = ptrace(PTRACE_FAULTINFO, pid, 0, &fi);
if (n < 0) {
+ ptrace_faultinfo = 0;
if(errno == EIO)
printf("not found\n");
- else {
+ else
perror("not found");
- }
- ret = 0;
- } else {
- printf("found\n");
+ }
+ else {
+ if (!ptrace_faultinfo)
+ printf("found but disabled on command line\n");
+ else
+ printf("found\n");
}
init_registers(pid);
- stop_ptraced_child(pid, 1, 1);
-
- return(ret);
+ stop_ptraced_child(pid, stack, 1, 1);
}
int can_do_skas(void)
{
- int ret = 1;
-
printf("Checking for /proc/mm...");
if (os_access("/proc/mm", OS_ACC_W_OK) < 0) {
+ proc_mm = 0;
printf("not found\n");
- ret = 0;
- goto out;
- } else {
- printf("found\n");
+ }
+ else {
+ if (!proc_mm)
+ printf("found but disabled on command line\n");
+ else
+ printf("found\n");
}
- ret = check_skas3_ptrace_support();
-out:
- return ret;
+ check_skas3_ptrace_support();
+ return 1;
}
#else
int can_do_skas(void)
diff --git a/arch/um/os-Linux/sys-i386/registers.c b/arch/um/os-Linux/sys-i386/registers.c
index 9a0ad094d926..3125d320722c 100644
--- a/arch/um/os-Linux/sys-i386/registers.c
+++ b/arch/um/os-Linux/sys-i386/registers.c
@@ -121,6 +121,11 @@ void init_registers(int pid)
err);
}
+void get_safe_registers(unsigned long *regs)
+{
+ memcpy(regs, exec_regs, HOST_FRAME_SIZE * sizeof(unsigned long));
+}
+
/*
* Overrides for Emacs so that we follow Linus's tabbing style.
* Emacs will notice this stuff at the end of the file and automatically
diff --git a/arch/um/os-Linux/sys-x86_64/registers.c b/arch/um/os-Linux/sys-x86_64/registers.c
index 6286c974bbeb..44438d15c3d6 100644
--- a/arch/um/os-Linux/sys-x86_64/registers.c
+++ b/arch/um/os-Linux/sys-x86_64/registers.c
@@ -69,6 +69,11 @@ void init_registers(int pid)
err);
}
+void get_safe_registers(unsigned long *regs)
+{
+ memcpy(regs, exec_regs, HOST_FRAME_SIZE * sizeof(unsigned long));
+}
+
/*
* Overrides for Emacs so that we follow Linus's tabbing style.
* Emacs will notice this stuff at the end of the file and automatically
diff --git a/arch/um/os-Linux/tt.c b/arch/um/os-Linux/tt.c
new file mode 100644
index 000000000000..5b047ab8416a
--- /dev/null
+++ b/arch/um/os-Linux/tt.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <signal.h>
+#include <sched.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <setjmp.h>
+#include <sys/time.h>
+#include <sys/ptrace.h>
+#include <linux/ptrace.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+#include <asm/ptrace.h>
+#include <asm/unistd.h>
+#include <asm/page.h>
+#include "user_util.h"
+#include "kern_util.h"
+#include "user.h"
+#include "signal_kern.h"
+#include "signal_user.h"
+#include "sysdep/ptrace.h"
+#include "sysdep/sigcontext.h"
+#include "irq_user.h"
+#include "ptrace_user.h"
+#include "time_user.h"
+#include "init.h"
+#include "os.h"
+#include "uml-config.h"
+#include "choose-mode.h"
+#include "mode.h"
+#include "tempfile.h"
+
+/*
+ *-------------------------
+ * only for tt mode (will be deleted in future...)
+ *-------------------------
+ */
+
+struct tramp {
+ int (*tramp)(void *);
+ void *tramp_data;
+ unsigned long temp_stack;
+ int flags;
+ int pid;
+};
+
+/* See above for why sigkill is here */
+
+int sigkill = SIGKILL;
+
+int outer_tramp(void *arg)
+{
+ struct tramp *t;
+ int sig = sigkill;
+
+ t = arg;
+ t->pid = clone(t->tramp, (void *) t->temp_stack + page_size()/2,
+ t->flags, t->tramp_data);
+ if(t->pid > 0) wait_for_stop(t->pid, SIGSTOP, PTRACE_CONT, NULL);
+ kill(os_getpid(), sig);
+ _exit(0);
+}
+
+int start_fork_tramp(void *thread_arg, unsigned long temp_stack,
+ int clone_flags, int (*tramp)(void *))
+{
+ struct tramp arg;
+ unsigned long sp;
+ int new_pid, status, err;
+
+ /* The trampoline will run on the temporary stack */
+ sp = stack_sp(temp_stack);
+
+ clone_flags |= CLONE_FILES | SIGCHLD;
+
+ arg.tramp = tramp;
+ arg.tramp_data = thread_arg;
+ arg.temp_stack = temp_stack;
+ arg.flags = clone_flags;
+
+ /* Start the process and wait for it to kill itself */
+ new_pid = clone(outer_tramp, (void *) sp, clone_flags, &arg);
+ if(new_pid < 0)
+ return(new_pid);
+
+ CATCH_EINTR(err = waitpid(new_pid, &status, 0));
+ if(err < 0)
+ panic("Waiting for outer trampoline failed - errno = %d",
+ errno);
+
+ if(!WIFSIGNALED(status) || (WTERMSIG(status) != SIGKILL))
+ panic("outer trampoline didn't exit with SIGKILL, "
+ "status = %d", status);
+
+ return(arg.pid);
+}
+
+void forward_pending_sigio(int target)
+{
+ sigset_t sigs;
+
+ if(sigpending(&sigs))
+ panic("forward_pending_sigio : sigpending failed");
+ if(sigismember(&sigs, SIGIO))
+ kill(target, SIGIO);
+}
+
diff --git a/arch/um/os-Linux/user_syms.c b/arch/um/os-Linux/user_syms.c
index 75d7af9ae1d2..56d3f870926b 100644
--- a/arch/um/os-Linux/user_syms.c
+++ b/arch/um/os-Linux/user_syms.c
@@ -83,6 +83,9 @@ EXPORT_SYMBOL_PROTO(statfs64);
EXPORT_SYMBOL_PROTO(getuid);
+EXPORT_SYMBOL_PROTO(fsync);
+EXPORT_SYMBOL_PROTO(fdatasync);
+
/*
* Overrides for Emacs so that we follow Linus's tabbing style.
* Emacs will notice this stuff at the end of the file and automatically
diff --git a/arch/um/scripts/Makefile.rules b/arch/um/scripts/Makefile.rules
index 7459d09c233e..59a1291f477e 100644
--- a/arch/um/scripts/Makefile.rules
+++ b/arch/um/scripts/Makefile.rules
@@ -9,6 +9,11 @@ USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file))
$(USER_OBJS) : c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) \
$(CFLAGS_$(notdir $@))
+$(USER_OBJS): cmd_checksrc =
+$(USER_OBJS): quiet_cmd_checksrc =
+$(USER_OBJS): cmd_force_checksrc =
+$(USER_OBJS): quiet_cmd_force_checksrc =
+
# The stubs and unmap.o can't try to call mcount or update basic block data
define unprofile
@@ -16,6 +21,11 @@ define unprofile
endef
+# The stubs and unmap.o can't try to call mcount or update basic block data
+define unprofile
+ $(patsubst -pg,,$(patsubst -fprofile-arcs -ftest-coverage,,$(1)))
+endef
+
quiet_cmd_make_link = SYMLINK $@
cmd_make_link = ln -sf $(srctree)/arch/$(SUBARCH)/$($(notdir $@)-dir)/$(notdir $@) $@
diff --git a/arch/um/scripts/Makefile.unmap b/arch/um/scripts/Makefile.unmap
index 37a8f9765295..b2165188d942 100644
--- a/arch/um/scripts/Makefile.unmap
+++ b/arch/um/scripts/Makefile.unmap
@@ -12,8 +12,8 @@ $(obj)/unmap.o: _c_flags = $(call unprofile,$(CFLAGS))
quiet_cmd_wrapld = LD $@
define cmd_wrapld
- $(LD) -r -o $(obj)/unmap_tmp.o $< $(shell $(CC) -print-file-name=libc.a); \
- $(OBJCOPY) $(obj)/unmap_tmp.o $@ -G switcheroo
+ $(LD) $(LDFLAGS) -r -o $(obj)/unmap_tmp.o $< ; \
+ $(OBJCOPY) $(UML_OBJCOPYFLAGS) $(obj)/unmap_tmp.o $@ -G switcheroo
endef
$(obj)/unmap_fin.o : $(obj)/unmap.o FORCE
diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile
index 095bcdb0b9cc..4ca2a229da49 100644
--- a/arch/um/sys-i386/Makefile
+++ b/arch/um/sys-i386/Makefile
@@ -1,6 +1,6 @@
obj-y = bitops.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \
- ptrace_user.o semaphore.o signal.o sigcontext.o syscalls.o sysrq.o \
- sys_call_table.o
+ ptrace_user.o semaphore.o signal.o sigcontext.o stub.o stub_segv.o \
+ syscalls.o sysrq.o sys_call_table.o
obj-$(CONFIG_HIGHMEM) += highmem.o
obj-$(CONFIG_MODULES) += module.o
@@ -16,6 +16,8 @@ semaphore.c-dir = kernel
highmem.c-dir = mm
module.c-dir = kernel
+$(obj)/stub_segv.o : _c_flags = $(call unprofile,$(CFLAGS))
+
subdir- := util
include arch/um/scripts/Makefile.unmap
diff --git a/arch/um/sys-i386/kernel-offsets.c b/arch/um/sys-i386/kernel-offsets.c
index 9f8ecd1fdd96..a1070af2bcd8 100644
--- a/arch/um/sys-i386/kernel-offsets.c
+++ b/arch/um/sys-i386/kernel-offsets.c
@@ -2,6 +2,7 @@
#include <linux/stddef.h>
#include <linux/sched.h>
#include <linux/time.h>
+#include <linux/elf.h>
#include <asm/page.h>
#define DEFINE(sym, val) \
diff --git a/arch/um/sys-i386/ldt.c b/arch/um/sys-i386/ldt.c
index dc755b0b9db8..bd3c34aa52e5 100644
--- a/arch/um/sys-i386/ldt.c
+++ b/arch/um/sys-i386/ldt.c
@@ -4,96 +4,106 @@
*/
#include "linux/config.h"
+#include "linux/sched.h"
#include "linux/slab.h"
+#include "linux/types.h"
#include "asm/uaccess.h"
#include "asm/ptrace.h"
+#include "asm/smp.h"
+#include "asm/ldt.h"
#include "choose-mode.h"
#include "kern.h"
+#include "mode_kern.h"
#ifdef CONFIG_MODE_TT
-extern int modify_ldt(int func, void *ptr, unsigned long bytecount);
-/* XXX this needs copy_to_user and copy_from_user */
+extern int modify_ldt(int func, void *ptr, unsigned long bytecount);
-int sys_modify_ldt_tt(int func, void __user *ptr, unsigned long bytecount)
+static int do_modify_ldt_tt(int func, void *ptr, unsigned long bytecount)
{
- if (!access_ok(VERIFY_READ, ptr, bytecount))
- return -EFAULT;
-
return modify_ldt(func, ptr, bytecount);
}
+
#endif
#ifdef CONFIG_MODE_SKAS
-extern int userspace_pid[];
+#include "skas.h"
#include "skas_ptrace.h"
-int sys_modify_ldt_skas(int func, void __user *ptr, unsigned long bytecount)
+static int do_modify_ldt_skas(int func, void *ptr, unsigned long bytecount)
{
struct ptrace_ldt ldt;
- void *buf;
- int res, n;
+ u32 cpu;
+ int res;
- buf = kmalloc(bytecount, GFP_KERNEL);
- if(buf == NULL)
- return(-ENOMEM);
+ ldt = ((struct ptrace_ldt) { .func = func,
+ .ptr = ptr,
+ .bytecount = bytecount });
- res = 0;
+ cpu = get_cpu();
+ res = ptrace(PTRACE_LDT, userspace_pid[cpu], 0, (unsigned long) &ldt);
+ put_cpu();
+
+ return res;
+}
+#endif
+
+int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
+{
+ struct user_desc info;
+ int res = 0;
+ void *buf = NULL;
+ void *p = NULL; /* What we pass to host. */
switch(func){
case 1:
- case 0x11:
- res = copy_from_user(buf, ptr, bytecount);
- break;
- }
+ case 0x11: /* write_ldt */
+ /* Do this check now to avoid overflows. */
+ if (bytecount != sizeof(struct user_desc)) {
+ res = -EINVAL;
+ goto out;
+ }
+
+ if(copy_from_user(&info, ptr, sizeof(info))) {
+ res = -EFAULT;
+ goto out;
+ }
- if(res != 0){
- res = -EFAULT;
+ p = &info;
+ break;
+ case 0:
+ case 2: /* read_ldt */
+
+ /* The use of info avoids kmalloc on the write case, not on the
+ * read one. */
+ buf = kmalloc(bytecount, GFP_KERNEL);
+ if (!buf) {
+ res = -ENOMEM;
+ goto out;
+ }
+ p = buf;
+ default:
+ res = -ENOSYS;
goto out;
}
- ldt = ((struct ptrace_ldt) { .func = func,
- .ptr = buf,
- .bytecount = bytecount });
-#warning Need to look up userspace_pid by cpu
- res = ptrace(PTRACE_LDT, userspace_pid[0], 0, (unsigned long) &ldt);
+ res = CHOOSE_MODE_PROC(do_modify_ldt_tt, do_modify_ldt_skas, func,
+ p, bytecount);
if(res < 0)
goto out;
switch(func){
case 0:
case 2:
- n = res;
- res = copy_to_user(ptr, buf, n);
- if(res != 0)
+ /* Modify_ldt was for reading and returned the number of read
+ * bytes.*/
+ if(copy_to_user(ptr, p, res))
res = -EFAULT;
- else
- res = n;
break;
}
- out:
+out:
kfree(buf);
- return(res);
-}
-#endif
-
-int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
-{
- return(CHOOSE_MODE_PROC(sys_modify_ldt_tt, sys_modify_ldt_skas, func,
- ptr, bytecount));
+ return res;
}
-
-
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only. This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/sys-i386/signal.c b/arch/um/sys-i386/signal.c
index 4efc69a039d7..16bc19928b3c 100644
--- a/arch/um/sys-i386/signal.c
+++ b/arch/um/sys-i386/signal.c
@@ -122,9 +122,9 @@ int copy_sc_from_user_tt(struct sigcontext *to, struct sigcontext *from,
int err;
to_fp = to->fpstate;
- from_fp = from->fpstate;
sigs = to->oldmask;
err = copy_from_user(to, from, sizeof(*to));
+ from_fp = to->fpstate;
to->oldmask = sigs;
to->fpstate = to_fp;
if(to_fp != NULL)
diff --git a/arch/um/sys-i386/stub.S b/arch/um/sys-i386/stub.S
new file mode 100644
index 000000000000..6a70d9ab5c29
--- /dev/null
+++ b/arch/um/sys-i386/stub.S
@@ -0,0 +1,51 @@
+#include "uml-config.h"
+
+ .globl syscall_stub
+.section .__syscall_stub, "x"
+
+ .globl batch_syscall_stub
+batch_syscall_stub:
+ /* load pointer to first operation */
+ mov $(UML_CONFIG_STUB_DATA+8), %esp
+
+again:
+ /* load length of additional data */
+ mov 0x0(%esp), %eax
+
+ /* if(length == 0) : end of list */
+ /* write possible 0 to header */
+ mov %eax, UML_CONFIG_STUB_DATA+4
+ cmpl $0, %eax
+ jz done
+
+ /* save current pointer */
+ mov %esp, UML_CONFIG_STUB_DATA+4
+
+ /* skip additional data */
+ add %eax, %esp
+
+ /* load syscall-# */
+ pop %eax
+
+ /* load syscall params */
+ pop %ebx
+ pop %ecx
+ pop %edx
+ pop %esi
+ pop %edi
+ pop %ebp
+
+ /* execute syscall */
+ int $0x80
+
+ /* check return value */
+ pop %ebx
+ cmp %ebx, %eax
+ je again
+
+done:
+ /* save return value */
+ mov %eax, UML_CONFIG_STUB_DATA
+
+ /* stop */
+ int3
diff --git a/arch/um/sys-i386/stub_segv.c b/arch/um/sys-i386/stub_segv.c
new file mode 100644
index 000000000000..1e88b275edac
--- /dev/null
+++ b/arch/um/sys-i386/stub_segv.c
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#include <asm/signal.h>
+#include <asm/unistd.h>
+#include "uml-config.h"
+#include "sysdep/sigcontext.h"
+#include "sysdep/faultinfo.h"
+
+void __attribute__ ((__section__ (".__syscall_stub")))
+stub_segv_handler(int sig)
+{
+ struct sigcontext *sc = (struct sigcontext *) (&sig + 1);
+
+ GET_FAULTINFO_FROM_SC(*((struct faultinfo *) UML_CONFIG_STUB_DATA),
+ sc);
+
+ __asm__("movl %0, %%eax ; int $0x80": : "g" (__NR_getpid));
+ __asm__("movl %%eax, %%ebx ; movl %0, %%eax ; movl %1, %%ecx ;"
+ "int $0x80": : "g" (__NR_kill), "g" (SIGUSR1));
+ /* Load pointer to sigcontext into esp, since we need to leave
+ * the stack in its original form when we do the sigreturn here, by
+ * hand.
+ */
+ __asm__("mov %0,%%esp ; movl %1, %%eax ; "
+ "int $0x80" : : "a" (sc), "g" (__NR_sigreturn));
+}
diff --git a/arch/um/sys-i386/unmap.c b/arch/um/sys-i386/unmap.c
index 136875263d27..1b0ad0e4adcd 100644
--- a/arch/um/sys-i386/unmap.c
+++ b/arch/um/sys-i386/unmap.c
@@ -15,7 +15,7 @@ int switcheroo(int fd, int prot, void *from, void *to, int size)
if(munmap(to, size) < 0){
return(-1);
}
- if(mmap2(to, size, prot, MAP_SHARED | MAP_FIXED, fd, 0) != to){
+ if(mmap2(to, size, prot, MAP_SHARED | MAP_FIXED, fd, 0) == (void*) -1 ){
return(-1);
}
if(munmap(from, size) < 0){
diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile
index 2bc6f6849010..f0ab574d1e95 100644
--- a/arch/um/sys-x86_64/Makefile
+++ b/arch/um/sys-x86_64/Makefile
@@ -6,8 +6,8 @@
#XXX: why into lib-y?
lib-y = bitops.o bugs.o csum-partial.o delay.o fault.o mem.o memcpy.o \
- ptrace.o ptrace_user.o semaphore.o sigcontext.o signal.o \
- syscalls.o sysrq.o thunk.o syscall_table.o
+ ptrace.o ptrace_user.o sigcontext.o signal.o stub.o \
+ stub_segv.o syscalls.o syscall_table.o sysrq.o thunk.o
obj-y := ksyms.o
obj-$(CONFIG_MODULES) += module.o um_module.o
@@ -15,7 +15,7 @@ obj-$(CONFIG_MODULES) += module.o um_module.o
USER_OBJS := ptrace_user.o sigcontext.o
SYMLINKS = bitops.c csum-copy.S csum-partial.c csum-wrappers.c memcpy.S \
- semaphore.c thunk.S module.c
+ thunk.S module.c
include arch/um/scripts/Makefile.rules
@@ -24,10 +24,11 @@ csum-copy.S-dir = lib
csum-partial.c-dir = lib
csum-wrappers.c-dir = lib
memcpy.S-dir = lib
-semaphore.c-dir = kernel
thunk.S-dir = lib
module.c-dir = kernel
+$(obj)/stub_segv.o: _c_flags = $(call unprofile,$(CFLAGS))
+
subdir- := util
include arch/um/scripts/Makefile.unmap
diff --git a/arch/um/sys-x86_64/kernel-offsets.c b/arch/um/sys-x86_64/kernel-offsets.c
index 220e875cbe29..998541eade41 100644
--- a/arch/um/sys-x86_64/kernel-offsets.c
+++ b/arch/um/sys-x86_64/kernel-offsets.c
@@ -2,6 +2,7 @@
#include <linux/stddef.h>
#include <linux/sched.h>
#include <linux/time.h>
+#include <linux/elf.h>
#include <asm/page.h>
#define DEFINE(sym, val) \
diff --git a/arch/um/sys-x86_64/signal.c b/arch/um/sys-x86_64/signal.c
index 73a7926f7370..fe1d065332b1 100644
--- a/arch/um/sys-x86_64/signal.c
+++ b/arch/um/sys-x86_64/signal.c
@@ -104,28 +104,35 @@ int copy_sc_to_user_skas(struct sigcontext *to, struct _fpstate *to_fp,
int copy_sc_from_user_tt(struct sigcontext *to, struct sigcontext *from,
int fpsize)
{
- struct _fpstate *to_fp, *from_fp;
- unsigned long sigs;
- int err;
-
- to_fp = to->fpstate;
- from_fp = from->fpstate;
- sigs = to->oldmask;
- err = copy_from_user(to, from, sizeof(*to));
- to->oldmask = sigs;
- return(err);
+ struct _fpstate *to_fp, *from_fp;
+ unsigned long sigs;
+ int err;
+
+ to_fp = to->fpstate;
+ sigs = to->oldmask;
+ err = copy_from_user(to, from, sizeof(*to));
+ from_fp = to->fpstate;
+ to->fpstate = to_fp;
+ to->oldmask = sigs;
+ if(to_fp != NULL)
+ err |= copy_from_user(to_fp, from_fp, fpsize);
+ return(err);
}
int copy_sc_to_user_tt(struct sigcontext *to, struct _fpstate *fp,
struct sigcontext *from, int fpsize)
{
- struct _fpstate *to_fp, *from_fp;
- int err;
-
- to_fp = (fp ? fp : (struct _fpstate *) (to + 1));
- from_fp = from->fpstate;
- err = copy_to_user(to, from, sizeof(*to));
- return(err);
+ struct _fpstate *to_fp, *from_fp;
+ int err;
+
+ to_fp = (fp ? fp : (struct _fpstate *) (to + 1));
+ from_fp = from->fpstate;
+ err = copy_to_user(to, from, sizeof(*to));
+ if(from_fp != NULL){
+ err |= copy_to_user(&to->fpstate, &to_fp, sizeof(to->fpstate));
+ err |= copy_to_user(to_fp, from_fp, fpsize);
+ }
+ return(err);
}
#endif
@@ -168,7 +175,7 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
frame = (struct rt_sigframe __user *)
round_down(stack_top - sizeof(struct rt_sigframe), 16) - 8;
- ((unsigned char *) frame) -= 128;
+ frame = (struct rt_sigframe *) ((unsigned long) frame - 128);
if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate)))
goto out;
diff --git a/arch/um/sys-x86_64/stub.S b/arch/um/sys-x86_64/stub.S
new file mode 100644
index 000000000000..03c279735784
--- /dev/null
+++ b/arch/um/sys-x86_64/stub.S
@@ -0,0 +1,66 @@
+#include "uml-config.h"
+
+ .globl syscall_stub
+.section .__syscall_stub, "x"
+syscall_stub:
+ syscall
+ /* We don't have 64-bit constants, so this constructs the address
+ * we need.
+ */
+ movq $(UML_CONFIG_STUB_DATA >> 32), %rbx
+ salq $32, %rbx
+ movq $(UML_CONFIG_STUB_DATA & 0xffffffff), %rcx
+ or %rcx, %rbx
+ movq %rax, (%rbx)
+ int3
+
+ .globl batch_syscall_stub
+batch_syscall_stub:
+ mov $(UML_CONFIG_STUB_DATA >> 32), %rbx
+ sal $32, %rbx
+ mov $(UML_CONFIG_STUB_DATA & 0xffffffff), %rax
+ or %rax, %rbx
+ /* load pointer to first operation */
+ mov %rbx, %rsp
+ add $0x10, %rsp
+again:
+ /* load length of additional data */
+ mov 0x0(%rsp), %rax
+
+ /* if(length == 0) : end of list */
+ /* write possible 0 to header */
+ mov %rax, 8(%rbx)
+ cmp $0, %rax
+ jz done
+
+ /* save current pointer */
+ mov %rsp, 8(%rbx)
+
+ /* skip additional data */
+ add %rax, %rsp
+
+ /* load syscall-# */
+ pop %rax
+
+ /* load syscall params */
+ pop %rdi
+ pop %rsi
+ pop %rdx
+ pop %r10
+ pop %r8
+ pop %r9
+
+ /* execute syscall */
+ syscall
+
+ /* check return value */
+ pop %rcx
+ cmp %rcx, %rax
+ je again
+
+done:
+ /* save return value */
+ mov %rax, (%rbx)
+
+ /* stop */
+ int3
diff --git a/arch/um/sys-x86_64/stub_segv.c b/arch/um/sys-x86_64/stub_segv.c
new file mode 100644
index 000000000000..65a131b362b6
--- /dev/null
+++ b/arch/um/sys-x86_64/stub_segv.c
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#include <asm/signal.h>
+#include <linux/compiler.h>
+#include <asm/unistd.h>
+#include <asm/ucontext.h>
+#include "uml-config.h"
+#include "sysdep/sigcontext.h"
+#include "sysdep/faultinfo.h"
+
+void __attribute__ ((__section__ (".__syscall_stub")))
+stub_segv_handler(int sig)
+{
+ struct ucontext *uc;
+
+ __asm__("movq %%rdx, %0" : "=g" (uc) :);
+ GET_FAULTINFO_FROM_SC(*((struct faultinfo *) UML_CONFIG_STUB_DATA),
+ &uc->uc_mcontext);
+
+ __asm__("movq %0, %%rax ; syscall": : "g" (__NR_getpid));
+ __asm__("movq %%rax, %%rdi ; movq %0, %%rax ; movq %1, %%rsi ;"
+ "syscall": : "g" (__NR_kill), "g" (SIGUSR1));
+ /* Two popqs to restore the stack to the state just before entering
+ * the handler, one pops the return address, the other pops the frame
+ * pointer.
+ */
+ __asm__("popq %%rax ; popq %%rax ; movq %0, %%rax ; syscall" : : "g"
+ (__NR_rt_sigreturn));
+}
diff --git a/arch/um/sys-x86_64/unmap.c b/arch/um/sys-x86_64/unmap.c
index bc7094cce47e..f4a4bffd8a18 100644
--- a/arch/um/sys-x86_64/unmap.c
+++ b/arch/um/sys-x86_64/unmap.c
@@ -15,7 +15,7 @@ int switcheroo(int fd, int prot, void *from, void *to, int size)
if(munmap(to, size) < 0){
return(-1);
}
- if(mmap(to, size, prot, MAP_SHARED | MAP_FIXED, fd, 0) != to){
+ if(mmap(to, size, prot, MAP_SHARED | MAP_FIXED, fd, 0) == (void*) -1){
return(-1);
}
if(munmap(from, size) < 0){