diff options
Diffstat (limited to 'arch/s390/kernel')
28 files changed, 1385 insertions, 1201 deletions
| diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 7d9ec924e7e7..16b0b433f1f4 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -23,7 +23,7 @@ CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w  obj-y	:=  bitmap.o traps.o time.o process.o base.o early.o setup.o vtime.o \  	    processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o \  	    debug.o irq.o ipl.o dis.o diag.o mem_detect.o sclp.o vdso.o \ -	    sysinfo.o jump_label.o +	    sysinfo.o jump_label.o lgr.o os_info.o  obj-y	+= $(if $(CONFIG_64BIT),entry64.o,entry.o)  obj-y	+= $(if $(CONFIG_64BIT),reipl64.o,reipl.o) @@ -34,8 +34,6 @@ extra-y				+= $(if $(CONFIG_64BIT),head64.o,head31.o)  obj-$(CONFIG_MODULES)		+= s390_ksyms.o module.o  obj-$(CONFIG_SMP)		+= smp.o  obj-$(CONFIG_SCHED_BOOK)	+= topology.o -obj-$(CONFIG_SMP)		+= $(if $(CONFIG_64BIT),switch_cpu64.o, \ -							switch_cpu.o)  obj-$(CONFIG_HIBERNATION)	+= suspend.o swsusp_asm64.o  obj-$(CONFIG_AUDIT)		+= audit.o  compat-obj-$(CONFIG_AUDIT)	+= compat_audit.o diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 6e6a72e66d60..ed8c913db79e 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -8,9 +8,11 @@  #include <linux/kbuild.h>  #include <linux/sched.h> +#include <asm/cputime.h> +#include <asm/timer.h>  #include <asm/vdso.h> -#include <asm/sigp.h>  #include <asm/pgtable.h> +#include <asm/system.h>  /*   * Make sure that the compiler is new enough. We want a compiler that @@ -70,15 +72,15 @@ int main(void)  	DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC);  	DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);  	BLANK(); -	/* constants for SIGP */ -	DEFINE(__SIGP_STOP, sigp_stop); -	DEFINE(__SIGP_RESTART, sigp_restart); -	DEFINE(__SIGP_SENSE, sigp_sense); -	DEFINE(__SIGP_INITIAL_CPU_RESET, sigp_initial_cpu_reset); -	BLANK(); +	/* idle data offsets */ +	DEFINE(__IDLE_ENTER, offsetof(struct s390_idle_data, idle_enter)); +	DEFINE(__IDLE_EXIT, offsetof(struct s390_idle_data, idle_exit)); +	/* vtimer queue offsets */ +	DEFINE(__VQ_IDLE_ENTER, offsetof(struct vtimer_queue, idle_enter)); +	DEFINE(__VQ_IDLE_EXIT, offsetof(struct vtimer_queue, idle_exit));  	/* lowcore offsets */  	DEFINE(__LC_EXT_PARAMS, offsetof(struct _lowcore, ext_params)); -	DEFINE(__LC_CPU_ADDRESS, offsetof(struct _lowcore, cpu_addr)); +	DEFINE(__LC_EXT_CPU_ADDR, offsetof(struct _lowcore, ext_cpu_addr));  	DEFINE(__LC_EXT_INT_CODE, offsetof(struct _lowcore, ext_int_code));  	DEFINE(__LC_SVC_ILC, offsetof(struct _lowcore, svc_ilc));  	DEFINE(__LC_SVC_INT_CODE, offsetof(struct _lowcore, svc_code)); @@ -95,20 +97,19 @@ int main(void)  	DEFINE(__LC_IO_INT_WORD, offsetof(struct _lowcore, io_int_word));  	DEFINE(__LC_STFL_FAC_LIST, offsetof(struct _lowcore, stfl_fac_list));  	DEFINE(__LC_MCCK_CODE, offsetof(struct _lowcore, mcck_interruption_code)); -	DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib)); -	BLANK(); -	DEFINE(__LC_RST_NEW_PSW, offsetof(struct _lowcore, restart_psw));  	DEFINE(__LC_RST_OLD_PSW, offsetof(struct _lowcore, restart_old_psw));  	DEFINE(__LC_EXT_OLD_PSW, offsetof(struct _lowcore, external_old_psw));  	DEFINE(__LC_SVC_OLD_PSW, offsetof(struct _lowcore, svc_old_psw));  	DEFINE(__LC_PGM_OLD_PSW, offsetof(struct _lowcore, program_old_psw));  	DEFINE(__LC_MCK_OLD_PSW, offsetof(struct _lowcore, mcck_old_psw));  	DEFINE(__LC_IO_OLD_PSW, offsetof(struct _lowcore, io_old_psw)); +	DEFINE(__LC_RST_NEW_PSW, offsetof(struct _lowcore, restart_psw));  	DEFINE(__LC_EXT_NEW_PSW, offsetof(struct _lowcore, external_new_psw));  	DEFINE(__LC_SVC_NEW_PSW, offsetof(struct _lowcore, svc_new_psw));  	DEFINE(__LC_PGM_NEW_PSW, offsetof(struct _lowcore, program_new_psw));  	DEFINE(__LC_MCK_NEW_PSW, offsetof(struct _lowcore, mcck_new_psw));  	DEFINE(__LC_IO_NEW_PSW, offsetof(struct _lowcore, io_new_psw)); +	BLANK();  	DEFINE(__LC_SAVE_AREA_SYNC, offsetof(struct _lowcore, save_area_sync));  	DEFINE(__LC_SAVE_AREA_ASYNC, offsetof(struct _lowcore, save_area_async));  	DEFINE(__LC_SAVE_AREA_RESTART, offsetof(struct _lowcore, save_area_restart)); @@ -129,12 +130,16 @@ int main(void)  	DEFINE(__LC_KERNEL_STACK, offsetof(struct _lowcore, kernel_stack));  	DEFINE(__LC_ASYNC_STACK, offsetof(struct _lowcore, async_stack));  	DEFINE(__LC_PANIC_STACK, offsetof(struct _lowcore, panic_stack)); +	DEFINE(__LC_RESTART_STACK, offsetof(struct _lowcore, restart_stack)); +	DEFINE(__LC_RESTART_FN, offsetof(struct _lowcore, restart_fn));  	DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce));  	DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock));  	DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock));  	DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags));  	DEFINE(__LC_FTRACE_FUNC, offsetof(struct _lowcore, ftrace_func));  	DEFINE(__LC_IRB, offsetof(struct _lowcore, irb)); +	DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib)); +	BLANK();  	DEFINE(__LC_CPU_TIMER_SAVE_AREA, offsetof(struct _lowcore, cpu_timer_save_area));  	DEFINE(__LC_CLOCK_COMP_SAVE_AREA, offsetof(struct _lowcore, clock_comp_save_area));  	DEFINE(__LC_PSW_SAVE_AREA, offsetof(struct _lowcore, psw_save_area)); diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 6fe78c2f95d9..53a82c8d50e9 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -581,7 +581,6 @@ give_sigsegv:  int handle_signal32(unsigned long sig, struct k_sigaction *ka,  		    siginfo_t *info, sigset_t *oldset, struct pt_regs *regs)  { -	sigset_t blocked;  	int ret;  	/* Set up the stack frame */ @@ -591,10 +590,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka,  		ret = setup_frame32(sig, ka, oldset, regs);  	if (ret)  		return ret; -	sigorsets(&blocked, ¤t->blocked, &ka->sa.sa_mask); -	if (!(ka->sa.sa_flags & SA_NODEFER)) -		sigaddset(&blocked, sig); -	set_current_blocked(&blocked); +	block_sigmask(ka, sig);  	return 0;  } diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 39f8fd4438fc..cc1172b26873 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -11,10 +11,10 @@  #include <linux/module.h>  #include <linux/gfp.h>  #include <linux/slab.h> -#include <linux/crash_dump.h>  #include <linux/bootmem.h>  #include <linux/elf.h>  #include <asm/ipl.h> +#include <asm/os_info.h>  #define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y)))  #define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y))) @@ -52,7 +52,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,  /*   * Copy memory from old kernel   */ -static int copy_from_oldmem(void *dest, void *src, size_t count) +int copy_from_oldmem(void *dest, void *src, size_t count)  {  	unsigned long copied = 0;  	int rc; @@ -225,28 +225,44 @@ static void *nt_prpsinfo(void *ptr)  }  /* - * Initialize vmcoreinfo note (new kernel) + * Get vmcoreinfo using lowcore->vmcore_info (new kernel)   */ -static void *nt_vmcoreinfo(void *ptr) +static void *get_vmcoreinfo_old(unsigned long *size)  {  	char nt_name[11], *vmcoreinfo;  	Elf64_Nhdr note;  	void *addr;  	if (copy_from_oldmem(&addr, &S390_lowcore.vmcore_info, sizeof(addr))) -		return ptr; +		return NULL;  	memset(nt_name, 0, sizeof(nt_name));  	if (copy_from_oldmem(¬e, addr, sizeof(note))) -		return ptr; +		return NULL;  	if (copy_from_oldmem(nt_name, addr + sizeof(note), sizeof(nt_name) - 1)) -		return ptr; +		return NULL;  	if (strcmp(nt_name, "VMCOREINFO") != 0) -		return ptr; -	vmcoreinfo = kzalloc_panic(note.n_descsz + 1); +		return NULL; +	vmcoreinfo = kzalloc_panic(note.n_descsz);  	if (copy_from_oldmem(vmcoreinfo, addr + 24, note.n_descsz)) +		return NULL; +	*size = note.n_descsz; +	return vmcoreinfo; +} + +/* + * Initialize vmcoreinfo note (new kernel) + */ +static void *nt_vmcoreinfo(void *ptr) +{ +	unsigned long size; +	void *vmcoreinfo; + +	vmcoreinfo = os_info_old_entry(OS_INFO_VMCOREINFO, &size); +	if (!vmcoreinfo) +		vmcoreinfo = get_vmcoreinfo_old(&size); +	if (!vmcoreinfo)  		return ptr; -	vmcoreinfo[note.n_descsz + 1] = 0; -	return nt_init(ptr, 0, vmcoreinfo, note.n_descsz, "VMCOREINFO"); +	return nt_init(ptr, 0, vmcoreinfo, size, "VMCOREINFO");  }  /* diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 6848828b962e..19e5e9eba546 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -2,8 +2,8 @@   *  arch/s390/kernel/debug.c   *   S/390 debug facility   * - *    Copyright (C) 1999, 2000 IBM Deutschland Entwicklung GmbH, - *                             IBM Corporation + *    Copyright IBM Corp. 1999, 2012 + *   *    Author(s): Michael Holzheu (holzheu@de.ibm.com),   *               Holger Smolinski (Holger.Smolinski@de.ibm.com)   * @@ -167,6 +167,7 @@ static debug_info_t *debug_area_last = NULL;  static DEFINE_MUTEX(debug_mutex);  static int initialized; +static int debug_critical;  static const struct file_operations debug_file_ops = {  	.owner   = THIS_MODULE, @@ -932,6 +933,11 @@ debug_stop_all(void)  } +void debug_set_critical(void) +{ +	debug_critical = 1; +} +  /*   * debug_event_common:   * - write debug entry with given size @@ -945,7 +951,11 @@ debug_event_common(debug_info_t * id, int level, const void *buf, int len)  	if (!debug_active || !id->areas)  		return NULL; -	spin_lock_irqsave(&id->lock, flags); +	if (debug_critical) { +		if (!spin_trylock_irqsave(&id->lock, flags)) +			return NULL; +	} else +		spin_lock_irqsave(&id->lock, flags);  	active = get_active_entry(id);  	memset(DEBUG_DATA(active), 0, id->buf_size);  	memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size)); @@ -968,7 +978,11 @@ debug_entry_t  	if (!debug_active || !id->areas)  		return NULL; -	spin_lock_irqsave(&id->lock, flags); +	if (debug_critical) { +		if (!spin_trylock_irqsave(&id->lock, flags)) +			return NULL; +	} else +		spin_lock_irqsave(&id->lock, flags);  	active = get_active_entry(id);  	memset(DEBUG_DATA(active), 0, id->buf_size);  	memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size)); @@ -1013,7 +1027,11 @@ debug_sprintf_event(debug_info_t* id, int level,char *string,...)  		return NULL;  	numargs=debug_count_numargs(string); -	spin_lock_irqsave(&id->lock, flags); +	if (debug_critical) { +		if (!spin_trylock_irqsave(&id->lock, flags)) +			return NULL; +	} else +		spin_lock_irqsave(&id->lock, flags);  	active = get_active_entry(id);  	curr_event=(debug_sprintf_entry_t *) DEBUG_DATA(active);  	va_start(ap,string); @@ -1047,7 +1065,11 @@ debug_sprintf_exception(debug_info_t* id, int level,char *string,...)  	numargs=debug_count_numargs(string); -	spin_lock_irqsave(&id->lock, flags); +	if (debug_critical) { +		if (!spin_trylock_irqsave(&id->lock, flags)) +			return NULL; +	} else +		spin_lock_irqsave(&id->lock, flags);  	active = get_active_entry(id);  	curr_event=(debug_sprintf_entry_t *)DEBUG_DATA(active);  	va_start(ap,string); @@ -1428,10 +1450,10 @@ debug_hex_ascii_format_fn(debug_info_t * id, struct debug_view *view,  	rc += sprintf(out_buf + rc, "| ");  	for (i = 0; i < id->buf_size; i++) {  		unsigned char c = in_buf[i]; -		if (!isprint(c)) -			rc += sprintf(out_buf + rc, "."); -		else +		if (isascii(c) && isprint(c))  			rc += sprintf(out_buf + rc, "%c", c); +		else +			rc += sprintf(out_buf + rc, ".");  	}  	rc += sprintf(out_buf + rc, "\n");  	return rc; diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 52098d6dfaa7..578eb4e6d157 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -29,6 +29,7 @@  #include <asm/sysinfo.h>  #include <asm/cpcmd.h>  #include <asm/sclp.h> +#include <asm/system.h>  #include "entry.h"  /* @@ -262,25 +263,8 @@ static noinline __init void setup_lowcore_early(void)  static noinline __init void setup_facility_list(void)  { -	unsigned long nr; - -	S390_lowcore.stfl_fac_list = 0; -	asm volatile( -		"	.insn	s,0xb2b10000,0(0)\n" /* stfl */ -		"0:\n" -		EX_TABLE(0b,0b) : "=m" (S390_lowcore.stfl_fac_list)); -	memcpy(&S390_lowcore.stfle_fac_list, &S390_lowcore.stfl_fac_list, 4); -	nr = 4;				/* # bytes stored by stfl */ -	if (test_facility(7)) { -		/* More facility bits available with stfle */ -		register unsigned long reg0 asm("0") = MAX_FACILITY_BIT/64 - 1; -		asm volatile(".insn s,0xb2b00000,%0" /* stfle */ -			     : "=m" (S390_lowcore.stfle_fac_list), "+d" (reg0) -			     : : "cc"); -		nr = (reg0 + 1) * 8;	/* # bytes stored by stfle */ -	} -	memset((char *) S390_lowcore.stfle_fac_list + nr, 0, -	       MAX_FACILITY_BIT/8 - nr); +	stfle(S390_lowcore.stfle_fac_list, +	      ARRAY_SIZE(S390_lowcore.stfle_fac_list));  }  static noinline __init void setup_hpage(void) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 3705700ed374..74ee563fe62b 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -2,7 +2,7 @@   *  arch/s390/kernel/entry.S   *    S390 low-level entry points.   * - *    Copyright (C) IBM Corp. 1999,2006 + *    Copyright (C) IBM Corp. 1999,2012   *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),   *		 Hartmut Penner (hp@de.ibm.com),   *		 Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), @@ -105,14 +105,14 @@ STACK_SIZE  = 1 << STACK_SHIFT  	.macro	ADD64 high,low,timer  	al	\high,\timer -	al	\low,\timer+4 +	al	\low,4+\timer  	brc	12,.+8  	ahi	\high,1  	.endm  	.macro	SUB64 high,low,timer  	sl	\high,\timer -	sl	\low,\timer+4 +	sl	\low,4+\timer  	brc	3,.+8  	ahi	\high,-1  	.endm @@ -471,7 +471,6 @@ io_tif:  	jnz	io_work			# there is work to do (signals etc.)  io_restore:  	mvc	__LC_RETURN_PSW(8),__PT_PSW(%r11) -	ni	__LC_RETURN_PSW+1,0xfd	# clean wait state bit  	stpt	__LC_EXIT_TIMER  	lm	%r0,%r15,__PT_R0(%r11)  	lpsw	__LC_RETURN_PSW @@ -606,12 +605,32 @@ ext_skip:  	stm	%r8,%r9,__PT_PSW(%r11)  	TRACE_IRQS_OFF  	lr	%r2,%r11		# pass pointer to pt_regs -	l	%r3,__LC_CPU_ADDRESS	# get cpu address + interruption code +	l	%r3,__LC_EXT_CPU_ADDR	# get cpu address + interruption code  	l	%r4,__LC_EXT_PARAMS	# get external parameters  	l	%r1,BASED(.Ldo_extint)  	basr	%r14,%r1		# call do_extint  	j	io_return +/* + * Load idle PSW. The second "half" of this function is in cleanup_idle. + */ +ENTRY(psw_idle) +	st	%r4,__SF_EMPTY(%r15) +	basr	%r1,0 +	la	%r1,psw_idle_lpsw+4-.(%r1) +	st	%r1,__SF_EMPTY+4(%r15) +	oi	__SF_EMPTY+4(%r15),0x80 +	la	%r1,.Lvtimer_max-psw_idle_lpsw-4(%r1) +	stck	__IDLE_ENTER(%r2) +	ltr	%r5,%r5 +	stpt	__VQ_IDLE_ENTER(%r3) +	jz	psw_idle_lpsw +	spt	0(%r1) +psw_idle_lpsw: +	lpsw	__SF_EMPTY(%r15) +	br	%r14 +psw_idle_end: +  __critical_end:  /* @@ -673,7 +692,6 @@ mcck_skip:  	TRACE_IRQS_ON  mcck_return:  	mvc	__LC_RETURN_MCCK_PSW(8),__PT_PSW(%r11) # move return PSW -	ni	__LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit  	tm	__LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?  	jno	0f  	lm	%r0,%r15,__PT_R0(%r11) @@ -691,77 +709,30 @@ mcck_panic:  0:	ahi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)  	j	mcck_skip -/* - * Restart interruption handler, kick starter for additional CPUs - */ -#ifdef CONFIG_SMP -	__CPUINIT -ENTRY(restart_int_handler) -	basr	%r1,0 -restart_base: -	spt	restart_vtime-restart_base(%r1) -	stck	__LC_LAST_UPDATE_CLOCK -	mvc	__LC_LAST_UPDATE_TIMER(8),restart_vtime-restart_base(%r1) -	mvc	__LC_EXIT_TIMER(8),restart_vtime-restart_base(%r1) -	l	%r15,__LC_GPREGS_SAVE_AREA+60 # load ksp -	lctl	%c0,%c15,__LC_CREGS_SAVE_AREA # get new ctl regs -	lam	%a0,%a15,__LC_AREGS_SAVE_AREA -	lm	%r6,%r15,__SF_GPRS(%r15)# load registers from clone -	l	%r1,__LC_THREAD_INFO -	mvc	__LC_USER_TIMER(8),__TI_user_timer(%r1) -	mvc	__LC_SYSTEM_TIMER(8),__TI_system_timer(%r1) -	xc	__LC_STEAL_TIMER(8),__LC_STEAL_TIMER -	ssm	__LC_PGM_NEW_PSW	# turn dat on, keep irqs off -	basr	%r14,0 -	l	%r14,restart_addr-.(%r14) -	basr	%r14,%r14		# call start_secondary -restart_addr: -	.long	start_secondary -	.align	8 -restart_vtime: -	.long	0x7fffffff,0xffffffff -	.previous -#else -/* - * If we do not run with SMP enabled, let the new CPU crash ... - */ -ENTRY(restart_int_handler) -	basr	%r1,0 -restart_base: -	lpsw	restart_crash-restart_base(%r1) -	.align	8 -restart_crash: -	.long	0x000a0000,0x00000000 -restart_go: -#endif -  #  # PSW restart interrupt handler  # -ENTRY(psw_restart_int_handler) +ENTRY(restart_int_handler)  	st	%r15,__LC_SAVE_AREA_RESTART -	basr	%r15,0 -0:	l	%r15,.Lrestart_stack-0b(%r15)	# load restart stack -	l	%r15,0(%r15) +	l	%r15,__LC_RESTART_STACK  	ahi	%r15,-__PT_SIZE			# create pt_regs on stack +	xc	0(__PT_SIZE,%r15),0(%r15)  	stm	%r0,%r14,__PT_R0(%r15)  	mvc	__PT_R15(4,%r15),__LC_SAVE_AREA_RESTART  	mvc	__PT_PSW(8,%r15),__LC_RST_OLD_PSW # store restart old psw -	ahi	%r15,-STACK_FRAME_OVERHEAD -	xc	__SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) -	basr	%r14,0 -1:	l	%r14,.Ldo_restart-1b(%r14) -	basr	%r14,%r14 -	basr	%r14,0				# load disabled wait PSW if -2:	lpsw	restart_psw_crash-2b(%r14)	# do_restart returns -	.align 4 -.Ldo_restart: -	.long	do_restart -.Lrestart_stack: -	.long	restart_stack -	.align 8 -restart_psw_crash: -	.long	0x000a0000,0x00000000 + restart_psw_crash +	ahi	%r15,-STACK_FRAME_OVERHEAD	# create stack frame on stack +	xc	0(STACK_FRAME_OVERHEAD,%r15),0(%r15) +	lm	%r1,%r3,__LC_RESTART_FN		# load fn, parm & source cpu +	ltr	%r3,%r3				# test source cpu address +	jm	1f				# negative -> skip source stop +0:	sigp	%r4,%r3,1			# sigp sense to source cpu +	brc	10,0b				# wait for status stored +1:	basr	%r14,%r1			# call function +	stap	__SF_EMPTY(%r15)		# store cpu address +	lh	%r3,__SF_EMPTY(%r15) +2:	sigp	%r4,%r3,5			# sigp stop to current cpu +	brc	2,2b +3:	j	3b  	.section .kprobes.text, "ax" @@ -795,6 +766,8 @@ cleanup_table:  	.long	io_tif + 0x80000000  	.long	io_restore + 0x80000000  	.long	io_done + 0x80000000 +	.long	psw_idle + 0x80000000 +	.long	psw_idle_end + 0x80000000  cleanup_critical:  	cl	%r9,BASED(cleanup_table)	# system_call @@ -813,6 +786,10 @@ cleanup_critical:  	jl	cleanup_io_tif  	cl	%r9,BASED(cleanup_table+28)	# io_done  	jl	cleanup_io_restore +	cl	%r9,BASED(cleanup_table+32)	# psw_idle +	jl	0f +	cl	%r9,BASED(cleanup_table+36)	# psw_idle_end +	jl	cleanup_idle  0:	br	%r14  cleanup_system_call: @@ -896,7 +873,6 @@ cleanup_io_restore:  	jhe	0f  	l	%r9,12(%r11)		# get saved r11 pointer to pt_regs  	mvc	__LC_RETURN_PSW(8),__PT_PSW(%r9) -	ni	__LC_RETURN_PSW+1,0xfd	# clear wait state bit  	mvc	0(32,%r11),__PT_R8(%r9)  	lm	%r0,%r7,__PT_R0(%r9)  0:	lm	%r8,%r9,__LC_RETURN_PSW @@ -904,11 +880,52 @@ cleanup_io_restore:  cleanup_io_restore_insn:  	.long	io_done - 4 + 0x80000000 +cleanup_idle: +	# copy interrupt clock & cpu timer +	mvc	__IDLE_EXIT(8,%r2),__LC_INT_CLOCK +	mvc	__VQ_IDLE_EXIT(8,%r3),__LC_ASYNC_ENTER_TIMER +	chi	%r11,__LC_SAVE_AREA_ASYNC +	je	0f +	mvc	__IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK +	mvc	__VQ_IDLE_EXIT(8,%r3),__LC_MCCK_ENTER_TIMER +0:	# check if stck has been executed +	cl	%r9,BASED(cleanup_idle_insn) +	jhe	1f +	mvc	__IDLE_ENTER(8,%r2),__IDLE_EXIT(%r2) +	mvc	__VQ_IDLE_ENTER(8,%r3),__VQ_IDLE_EXIT(%r3) +	j	2f +1:	# check if the cpu timer has been reprogrammed +	ltr	%r5,%r5 +	jz	2f +	spt	__VQ_IDLE_ENTER(%r3) +2:	# account system time going idle +	lm	%r9,%r10,__LC_STEAL_TIMER +	ADD64	%r9,%r10,__IDLE_ENTER(%r2) +	SUB64	%r9,%r10,__LC_LAST_UPDATE_CLOCK +	stm	%r9,%r10,__LC_STEAL_TIMER +	mvc	__LC_LAST_UPDATE_CLOCK(8),__IDLE_EXIT(%r2) +	lm	%r9,%r10,__LC_SYSTEM_TIMER +	ADD64	%r9,%r10,__LC_LAST_UPDATE_TIMER +	SUB64	%r9,%r10,__VQ_IDLE_ENTER(%r3) +	stm	%r9,%r10,__LC_SYSTEM_TIMER +	mvc	__LC_LAST_UPDATE_TIMER(8),__VQ_IDLE_EXIT(%r3) +	# prepare return psw +	n	%r8,BASED(cleanup_idle_wait)	# clear wait state bit +	l	%r9,24(%r11)			# return from psw_idle +	br	%r14 +cleanup_idle_insn: +	.long	psw_idle_lpsw + 0x80000000 +cleanup_idle_wait: +	.long	0xfffdffff +  /*   * Integer constants   */  	.align	4 -.Lnr_syscalls:		.long	NR_syscalls +.Lnr_syscalls: +	.long	NR_syscalls +.Lvtimer_max: +	.quad	0x7fffffffffffffff  /*   * Symbol constants diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index bf538aaf407d..6cdddac93a2e 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -4,11 +4,22 @@  #include <linux/types.h>  #include <linux/signal.h>  #include <asm/ptrace.h> - +#include <asm/cputime.h> +#include <asm/timer.h>  extern void (*pgm_check_table[128])(struct pt_regs *);  extern void *restart_stack; +void system_call(void); +void pgm_check_handler(void); +void ext_int_handler(void); +void io_int_handler(void); +void mcck_int_handler(void); +void restart_int_handler(void); +void restart_call_handler(void); +void psw_idle(struct s390_idle_data *, struct vtimer_queue *, +	      unsigned long, int); +  asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);  asmlinkage void do_syscall_trace_exit(struct pt_regs *regs); @@ -24,9 +35,9 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka,  		    siginfo_t *info, sigset_t *oldset, struct pt_regs *regs);  void do_notify_resume(struct pt_regs *regs); -void do_extint(struct pt_regs *regs, unsigned int, unsigned int, unsigned long); +struct ext_code; +void do_extint(struct pt_regs *regs, struct ext_code, unsigned int, unsigned long);  void do_restart(void); -int __cpuinit start_secondary(void *cpuvoid);  void __init startup_init(void);  void die(struct pt_regs *regs, const char *str); diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 412a7b8783d7..4e1c292fa7e3 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -2,7 +2,7 @@   *  arch/s390/kernel/entry64.S   *    S390 low-level entry points.   * - *    Copyright (C) IBM Corp. 1999,2010 + *    Copyright (C) IBM Corp. 1999,2012   *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),   *		 Hartmut Penner (hp@de.ibm.com),   *		 Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), @@ -489,7 +489,6 @@ io_restore:  	lg	%r14,__LC_VDSO_PER_CPU  	lmg	%r0,%r10,__PT_R0(%r11)  	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r11) -	ni	__LC_RETURN_PSW+1,0xfd	# clear wait state bit  	stpt	__LC_EXIT_TIMER  	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER  	lmg	%r11,%r15,__PT_R11(%r11) @@ -625,12 +624,30 @@ ext_skip:  	TRACE_IRQS_OFF  	lghi	%r1,4096  	lgr	%r2,%r11		# pass pointer to pt_regs -	llgf	%r3,__LC_CPU_ADDRESS	# get cpu address + interruption code +	llgf	%r3,__LC_EXT_CPU_ADDR	# get cpu address + interruption code  	llgf	%r4,__LC_EXT_PARAMS	# get external parameter  	lg	%r5,__LC_EXT_PARAMS2-4096(%r1)	# get 64 bit external parameter  	brasl	%r14,do_extint  	j	io_return +/* + * Load idle PSW. The second "half" of this function is in cleanup_idle. + */ +ENTRY(psw_idle) +	stg	%r4,__SF_EMPTY(%r15) +	larl	%r1,psw_idle_lpsw+4 +	stg	%r1,__SF_EMPTY+8(%r15) +	larl	%r1,.Lvtimer_max +	stck	__IDLE_ENTER(%r2) +	ltr	%r5,%r5 +	stpt	__VQ_IDLE_ENTER(%r3) +	jz	psw_idle_lpsw +	spt	0(%r1) +psw_idle_lpsw: +	lpswe	__SF_EMPTY(%r15) +	br	%r14 +psw_idle_end: +  __critical_end:  /* @@ -696,7 +713,6 @@ mcck_return:  	lg	%r14,__LC_VDSO_PER_CPU  	lmg	%r0,%r10,__PT_R0(%r11)  	mvc	__LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW -	ni	__LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit  	tm	__LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?  	jno	0f  	stpt	__LC_EXIT_TIMER @@ -713,68 +729,30 @@ mcck_panic:  0:	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)  	j	mcck_skip -/* - * Restart interruption handler, kick starter for additional CPUs - */ -#ifdef CONFIG_SMP -	__CPUINIT -ENTRY(restart_int_handler) -	basr	%r1,0 -restart_base: -	spt	restart_vtime-restart_base(%r1) -	stck	__LC_LAST_UPDATE_CLOCK -	mvc	__LC_LAST_UPDATE_TIMER(8),restart_vtime-restart_base(%r1) -	mvc	__LC_EXIT_TIMER(8),restart_vtime-restart_base(%r1) -	lghi	%r10,__LC_GPREGS_SAVE_AREA -	lg	%r15,120(%r10)		# load ksp -	lghi	%r10,__LC_CREGS_SAVE_AREA -	lctlg	%c0,%c15,0(%r10)	# get new ctl regs -	lghi	%r10,__LC_AREGS_SAVE_AREA -	lam	%a0,%a15,0(%r10) -	lmg	%r6,%r15,__SF_GPRS(%r15)# load registers from clone -	lg	%r1,__LC_THREAD_INFO -	mvc	__LC_USER_TIMER(8),__TI_user_timer(%r1) -	mvc	__LC_SYSTEM_TIMER(8),__TI_system_timer(%r1) -	xc	__LC_STEAL_TIMER(8),__LC_STEAL_TIMER -	ssm	__LC_PGM_NEW_PSW	# turn dat on, keep irqs off -	brasl	%r14,start_secondary -	.align	8 -restart_vtime: -	.long	0x7fffffff,0xffffffff -	.previous -#else -/* - * If we do not run with SMP enabled, let the new CPU crash ... - */ -ENTRY(restart_int_handler) -	basr	%r1,0 -restart_base: -	lpswe	restart_crash-restart_base(%r1) -	.align 8 -restart_crash: -	.long  0x000a0000,0x00000000,0x00000000,0x00000000 -restart_go: -#endif -  #  # PSW restart interrupt handler  # -ENTRY(psw_restart_int_handler) +ENTRY(restart_int_handler)  	stg	%r15,__LC_SAVE_AREA_RESTART -	larl	%r15,restart_stack		# load restart stack -	lg	%r15,0(%r15) +	lg	%r15,__LC_RESTART_STACK  	aghi	%r15,-__PT_SIZE			# create pt_regs on stack +	xc	0(__PT_SIZE,%r15),0(%r15)  	stmg	%r0,%r14,__PT_R0(%r15)  	mvc	__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART  	mvc	__PT_PSW(16,%r15),__LC_RST_OLD_PSW # store restart old psw -	aghi	%r15,-STACK_FRAME_OVERHEAD -	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) -	brasl	%r14,do_restart -	larl	%r14,restart_psw_crash		# load disabled wait PSW if -	lpswe	0(%r14)				# do_restart returns -	.align 8 -restart_psw_crash: -	.quad	0x0002000080000000,0x0000000000000000 + restart_psw_crash +	aghi	%r15,-STACK_FRAME_OVERHEAD	# create stack frame on stack +	xc	0(STACK_FRAME_OVERHEAD,%r15),0(%r15) +	lmg	%r1,%r3,__LC_RESTART_FN		# load fn, parm & source cpu +	ltgr	%r3,%r3				# test source cpu address +	jm	1f				# negative -> skip source stop +0:	sigp	%r4,%r3,1			# sigp sense to source cpu +	brc	10,0b				# wait for status stored +1:	basr	%r14,%r1			# call function +	stap	__SF_EMPTY(%r15)		# store cpu address +	llgh	%r3,__SF_EMPTY(%r15) +2:	sigp	%r4,%r3,5			# sigp stop to current cpu +	brc	2,2b +3:	j	3b  	.section .kprobes.text, "ax" @@ -808,6 +786,8 @@ cleanup_table:  	.quad	io_tif  	.quad	io_restore  	.quad	io_done +	.quad	psw_idle +	.quad	psw_idle_end  cleanup_critical:  	clg	%r9,BASED(cleanup_table)	# system_call @@ -826,6 +806,10 @@ cleanup_critical:  	jl	cleanup_io_tif  	clg	%r9,BASED(cleanup_table+56)	# io_done  	jl	cleanup_io_restore +	clg	%r9,BASED(cleanup_table+64)	# psw_idle +	jl	0f +	clg	%r9,BASED(cleanup_table+72)	# psw_idle_end +	jl	cleanup_idle  0:	br	%r14 @@ -915,7 +899,6 @@ cleanup_io_restore:  	je	0f  	lg	%r9,24(%r11)		# get saved r11 pointer to pt_regs  	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r9) -	ni	__LC_RETURN_PSW+1,0xfd	# clear wait state bit  	mvc	0(64,%r11),__PT_R8(%r9)  	lmg	%r0,%r7,__PT_R0(%r9)  0:	lmg	%r8,%r9,__LC_RETURN_PSW @@ -923,6 +906,42 @@ cleanup_io_restore:  cleanup_io_restore_insn:  	.quad	io_done - 4 +cleanup_idle: +	# copy interrupt clock & cpu timer +	mvc	__IDLE_EXIT(8,%r2),__LC_INT_CLOCK +	mvc	__VQ_IDLE_EXIT(8,%r3),__LC_ASYNC_ENTER_TIMER +	cghi	%r11,__LC_SAVE_AREA_ASYNC +	je	0f +	mvc	__IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK +	mvc	__VQ_IDLE_EXIT(8,%r3),__LC_MCCK_ENTER_TIMER +0:	# check if stck & stpt have been executed +	clg	%r9,BASED(cleanup_idle_insn) +	jhe	1f +	mvc	__IDLE_ENTER(8,%r2),__IDLE_EXIT(%r2) +	mvc	__VQ_IDLE_ENTER(8,%r3),__VQ_IDLE_EXIT(%r3) +	j	2f +1:	# check if the cpu timer has been reprogrammed +	ltr	%r5,%r5 +	jz	2f +	spt	__VQ_IDLE_ENTER(%r3) +2:	# account system time going idle +	lg	%r9,__LC_STEAL_TIMER +	alg	%r9,__IDLE_ENTER(%r2) +	slg	%r9,__LC_LAST_UPDATE_CLOCK +	stg	%r9,__LC_STEAL_TIMER +	mvc	__LC_LAST_UPDATE_CLOCK(8),__IDLE_EXIT(%r2) +	lg	%r9,__LC_SYSTEM_TIMER +	alg	%r9,__LC_LAST_UPDATE_TIMER +	slg	%r9,__VQ_IDLE_ENTER(%r3) +	stg	%r9,__LC_SYSTEM_TIMER +	mvc	__LC_LAST_UPDATE_TIMER(8),__VQ_IDLE_EXIT(%r3) +	# prepare return psw +	nihh	%r8,0xfffd		# clear wait state bit +	lg	%r9,48(%r11)		# return from psw_idle +	br	%r14 +cleanup_idle_insn: +	.quad	psw_idle_lpsw +  /*   * Integer constants   */ @@ -931,6 +950,8 @@ cleanup_io_restore_insn:  	.quad	__critical_start  .Lcritical_length:  	.quad	__critical_end - __critical_start +.Lvtimer_max: +	.quad	0x7fffffffffffffff  #if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index affa8e68124a..8342e65a140d 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -2,7 +2,7 @@   *  arch/s390/kernel/ipl.c   *    ipl/reipl/dump support for Linux on s390.   * - *    Copyright IBM Corp. 2005,2007 + *    Copyright IBM Corp. 2005,2012   *    Author(s): Michael Holzheu <holzheu@de.ibm.com>   *		 Heiko Carstens <heiko.carstens@de.ibm.com>   *		 Volker Sameske <sameske@de.ibm.com> @@ -17,6 +17,7 @@  #include <linux/fs.h>  #include <linux/gfp.h>  #include <linux/crash_dump.h> +#include <linux/debug_locks.h>  #include <asm/ipl.h>  #include <asm/smp.h>  #include <asm/setup.h> @@ -25,8 +26,9 @@  #include <asm/ebcdic.h>  #include <asm/reset.h>  #include <asm/sclp.h> -#include <asm/sigp.h>  #include <asm/checksum.h> +#include <asm/debug.h> +#include <asm/os_info.h>  #include "entry.h"  #define IPL_PARM_BLOCK_VERSION 0 @@ -571,7 +573,7 @@ static void __ipl_run(void *unused)  static void ipl_run(struct shutdown_trigger *trigger)  { -	smp_switch_to_ipl_cpu(__ipl_run, NULL); +	smp_call_ipl_cpu(__ipl_run, NULL);  }  static int __init ipl_init(void) @@ -950,6 +952,13 @@ static struct attribute_group reipl_nss_attr_group = {  	.attrs = reipl_nss_attrs,  }; +static void set_reipl_block_actual(struct ipl_parameter_block *reipl_block) +{ +	reipl_block_actual = reipl_block; +	os_info_entry_add(OS_INFO_REIPL_BLOCK, reipl_block_actual, +			  reipl_block->hdr.len); +} +  /* reipl type */  static int reipl_set_type(enum ipl_type type) @@ -965,7 +974,7 @@ static int reipl_set_type(enum ipl_type type)  			reipl_method = REIPL_METHOD_CCW_VM;  		else  			reipl_method = REIPL_METHOD_CCW_CIO; -		reipl_block_actual = reipl_block_ccw; +		set_reipl_block_actual(reipl_block_ccw);  		break;  	case IPL_TYPE_FCP:  		if (diag308_set_works) @@ -974,7 +983,7 @@ static int reipl_set_type(enum ipl_type type)  			reipl_method = REIPL_METHOD_FCP_RO_VM;  		else  			reipl_method = REIPL_METHOD_FCP_RO_DIAG; -		reipl_block_actual = reipl_block_fcp; +		set_reipl_block_actual(reipl_block_fcp);  		break;  	case IPL_TYPE_FCP_DUMP:  		reipl_method = REIPL_METHOD_FCP_DUMP; @@ -984,7 +993,7 @@ static int reipl_set_type(enum ipl_type type)  			reipl_method = REIPL_METHOD_NSS_DIAG;  		else  			reipl_method = REIPL_METHOD_NSS; -		reipl_block_actual = reipl_block_nss; +		set_reipl_block_actual(reipl_block_nss);  		break;  	case IPL_TYPE_UNKNOWN:  		reipl_method = REIPL_METHOD_DEFAULT; @@ -1101,7 +1110,7 @@ static void __reipl_run(void *unused)  static void reipl_run(struct shutdown_trigger *trigger)  { -	smp_switch_to_ipl_cpu(__reipl_run, NULL); +	smp_call_ipl_cpu(__reipl_run, NULL);  }  static void reipl_block_ccw_init(struct ipl_parameter_block *ipb) @@ -1256,6 +1265,29 @@ static int __init reipl_fcp_init(void)  	return 0;  } +static int __init reipl_type_init(void) +{ +	enum ipl_type reipl_type = ipl_info.type; +	struct ipl_parameter_block *reipl_block; +	unsigned long size; + +	reipl_block = os_info_old_entry(OS_INFO_REIPL_BLOCK, &size); +	if (!reipl_block) +		goto out; +	/* +	 * If we have an OS info reipl block, this will be used +	 */ +	if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_FCP) { +		memcpy(reipl_block_fcp, reipl_block, size); +		reipl_type = IPL_TYPE_FCP; +	} else if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_CCW) { +		memcpy(reipl_block_ccw, reipl_block, size); +		reipl_type = IPL_TYPE_CCW; +	} +out: +	return reipl_set_type(reipl_type); +} +  static int __init reipl_init(void)  {  	int rc; @@ -1277,10 +1309,7 @@ static int __init reipl_init(void)  	rc = reipl_nss_init();  	if (rc)  		return rc; -	rc = reipl_set_type(ipl_info.type); -	if (rc) -		return rc; -	return 0; +	return reipl_type_init();  }  static struct shutdown_action __refdata reipl_action = { @@ -1421,7 +1450,7 @@ static void dump_run(struct shutdown_trigger *trigger)  	if (dump_method == DUMP_METHOD_NONE)  		return;  	smp_send_stop(); -	smp_switch_to_ipl_cpu(__dump_run, NULL); +	smp_call_ipl_cpu(__dump_run, NULL);  }  static int __init dump_ccw_init(void) @@ -1499,30 +1528,12 @@ static struct shutdown_action __refdata dump_action = {  static void dump_reipl_run(struct shutdown_trigger *trigger)  { -	preempt_disable(); -	/* -	 * Bypass dynamic address translation (DAT) when storing IPL parameter -	 * information block address and checksum into the prefix area -	 * (corresponding to absolute addresses 0-8191). -	 * When enhanced DAT applies and the STE format control in one, -	 * the absolute address is formed without prefixing. In this case a -	 * normal store (stg/st) into the prefix area would no more match to -	 * absolute addresses 0-8191. -	 */ -#ifdef CONFIG_64BIT -	asm volatile("sturg %0,%1" -		:: "a" ((unsigned long) reipl_block_actual), -		"a" (&lowcore_ptr[smp_processor_id()]->ipib)); -#else -	asm volatile("stura %0,%1" -		:: "a" ((unsigned long) reipl_block_actual), -		"a" (&lowcore_ptr[smp_processor_id()]->ipib)); -#endif -	asm volatile("stura %0,%1" -		:: "a" (csum_partial(reipl_block_actual, -				     reipl_block_actual->hdr.len, 0)), -		"a" (&lowcore_ptr[smp_processor_id()]->ipib_checksum)); -	preempt_enable(); +	u32 csum; + +	csum = csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0); +	copy_to_absolute_zero(&S390_lowcore.ipib_checksum, &csum, sizeof(csum)); +	copy_to_absolute_zero(&S390_lowcore.ipib, &reipl_block_actual, +			      sizeof(reipl_block_actual));  	dump_run(trigger);  } @@ -1623,9 +1634,7 @@ static void stop_run(struct shutdown_trigger *trigger)  	if (strcmp(trigger->name, ON_PANIC_STR) == 0 ||  	    strcmp(trigger->name, ON_RESTART_STR) == 0)  		disabled_wait((unsigned long) __builtin_return_address(0)); -	while (sigp(smp_processor_id(), sigp_stop) == sigp_busy) -		cpu_relax(); -	for (;;); +	smp_stop_cpu();  }  static struct shutdown_action stop_action = {SHUTDOWN_ACTION_STOP_STR, @@ -1713,6 +1722,7 @@ static struct kobj_attribute on_panic_attr =  static void do_panic(void)  { +	lgr_info_log();  	on_panic_trigger.action->fn(&on_panic_trigger);  	stop_run(&on_panic_trigger);  } @@ -1738,9 +1748,8 @@ static ssize_t on_restart_store(struct kobject *kobj,  static struct kobj_attribute on_restart_attr =  	__ATTR(on_restart, 0644, on_restart_show, on_restart_store); -void do_restart(void) +static void __do_restart(void *ignore)  { -	smp_restart_with_online_cpu();  	smp_send_stop();  #ifdef CONFIG_CRASH_DUMP  	crash_kexec(NULL); @@ -1749,6 +1758,14 @@ void do_restart(void)  	stop_run(&on_restart_trigger);  } +void do_restart(void) +{ +	tracing_off(); +	debug_locks_off(); +	lgr_info_log(); +	smp_call_online_cpu(__do_restart, NULL); +} +  /* on halt */  static struct shutdown_trigger on_halt_trigger = {ON_HALT_STR, &stop_action}; diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index b9a7fdd9c814..2429ecd68872 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -165,13 +165,6 @@ static inline int ext_hash(u16 code)  	return (code + (code >> 9)) & 0xff;  } -static void ext_int_hash_update(struct rcu_head *head) -{ -	struct ext_int_info *p = container_of(head, struct ext_int_info, rcu); - -	kfree(p); -} -  int register_external_interrupt(u16 code, ext_int_handler_t handler)  {  	struct ext_int_info *p; @@ -202,38 +195,34 @@ int unregister_external_interrupt(u16 code, ext_int_handler_t handler)  	list_for_each_entry_rcu(p, &ext_int_hash[index], entry)  		if (p->code == code && p->handler == handler) {  			list_del_rcu(&p->entry); -			call_rcu(&p->rcu, ext_int_hash_update); +			kfree_rcu(p, rcu);  		}  	spin_unlock_irqrestore(&ext_int_hash_lock, flags);  	return 0;  }  EXPORT_SYMBOL(unregister_external_interrupt); -void __irq_entry do_extint(struct pt_regs *regs, unsigned int ext_int_code, +void __irq_entry do_extint(struct pt_regs *regs, struct ext_code ext_code,  			   unsigned int param32, unsigned long param64)  {  	struct pt_regs *old_regs; -	unsigned short code;  	struct ext_int_info *p;  	int index; -	code = (unsigned short) ext_int_code;  	old_regs = set_irq_regs(regs); -	s390_idle_check(regs, S390_lowcore.int_clock, -			S390_lowcore.async_enter_timer);  	irq_enter();  	if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)  		/* Serve timer interrupts first. */  		clock_comparator_work();  	kstat_cpu(smp_processor_id()).irqs[EXTERNAL_INTERRUPT]++; -	if (code != 0x1004) +	if (ext_code.code != 0x1004)  		__get_cpu_var(s390_idle).nohz_delay = 1; -	index = ext_hash(code); +	index = ext_hash(ext_code.code);  	rcu_read_lock();  	list_for_each_entry_rcu(p, &ext_int_hash[index], entry) -		if (likely(p->code == code)) -			p->handler(ext_int_code, param32, param64); +		if (likely(p->code == ext_code.code)) +			p->handler(ext_code, param32, param64);  	rcu_read_unlock();  	irq_exit();  	set_irq_regs(old_regs); diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c new file mode 100644 index 000000000000..8431b92ca3ae --- /dev/null +++ b/arch/s390/kernel/lgr.c @@ -0,0 +1,200 @@ +/* + * Linux Guest Relocation (LGR) detection + * + * Copyright IBM Corp. 2012 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#include <linux/module.h> +#include <linux/timer.h> +#include <linux/slab.h> +#include <asm/sysinfo.h> +#include <asm/ebcdic.h> +#include <asm/system.h> +#include <asm/debug.h> +#include <asm/ipl.h> + +#define LGR_TIMER_INTERVAL_SECS (30 * 60) +#define VM_LEVEL_MAX 2 /* Maximum is 8, but we only record two levels */ + +/* + * LGR info: Contains stfle and stsi data + */ +struct lgr_info { +	/* Bit field with facility information: 4 DWORDs are stored */ +	u64 stfle_fac_list[4]; +	/* Level of system (1 = CEC, 2 = LPAR, 3 = z/VM */ +	u32 level; +	/* Level 1: CEC info (stsi 1.1.1) */ +	char manufacturer[16]; +	char type[4]; +	char sequence[16]; +	char plant[4]; +	char model[16]; +	/* Level 2: LPAR info (stsi 2.2.2) */ +	u16 lpar_number; +	char name[8]; +	/* Level 3: VM info (stsi 3.2.2) */ +	u8 vm_count; +	struct { +		char name[8]; +		char cpi[16]; +	} vm[VM_LEVEL_MAX]; +} __packed __aligned(8); + +/* + * LGR globals + */ +static void *lgr_page; +static struct lgr_info lgr_info_last; +static struct lgr_info lgr_info_cur; +static struct debug_info *lgr_dbf; + +/* + * Return number of valid stsi levels + */ +static inline int stsi_0(void) +{ +	int rc = stsi(NULL, 0, 0, 0); + +	return rc == -ENOSYS ? rc : (((unsigned int) rc) >> 28); +} + +/* + * Copy buffer and then convert it to ASCII + */ +static void cpascii(char *dst, char *src, int size) +{ +	memcpy(dst, src, size); +	EBCASC(dst, size); +} + +/* + * Fill LGR info with 1.1.1 stsi data + */ +static void lgr_stsi_1_1_1(struct lgr_info *lgr_info) +{ +	struct sysinfo_1_1_1 *si = lgr_page; + +	if (stsi(si, 1, 1, 1) == -ENOSYS) +		return; +	cpascii(lgr_info->manufacturer, si->manufacturer, +		sizeof(si->manufacturer)); +	cpascii(lgr_info->type, si->type, sizeof(si->type)); +	cpascii(lgr_info->model, si->model, sizeof(si->model)); +	cpascii(lgr_info->sequence, si->sequence, sizeof(si->sequence)); +	cpascii(lgr_info->plant, si->plant, sizeof(si->plant)); +} + +/* + * Fill LGR info with 2.2.2 stsi data + */ +static void lgr_stsi_2_2_2(struct lgr_info *lgr_info) +{ +	struct sysinfo_2_2_2 *si = lgr_page; + +	if (stsi(si, 2, 2, 2) == -ENOSYS) +		return; +	cpascii(lgr_info->name, si->name, sizeof(si->name)); +	memcpy(&lgr_info->lpar_number, &si->lpar_number, +	       sizeof(lgr_info->lpar_number)); +} + +/* + * Fill LGR info with 3.2.2 stsi data + */ +static void lgr_stsi_3_2_2(struct lgr_info *lgr_info) +{ +	struct sysinfo_3_2_2 *si = lgr_page; +	int i; + +	if (stsi(si, 3, 2, 2) == -ENOSYS) +		return; +	for (i = 0; i < min_t(u8, si->count, VM_LEVEL_MAX); i++) { +		cpascii(lgr_info->vm[i].name, si->vm[i].name, +			sizeof(si->vm[i].name)); +		cpascii(lgr_info->vm[i].cpi, si->vm[i].cpi, +			sizeof(si->vm[i].cpi)); +	} +	lgr_info->vm_count = si->count; +} + +/* + * Fill LGR info with current data + */ +static void lgr_info_get(struct lgr_info *lgr_info) +{ +	memset(lgr_info, 0, sizeof(*lgr_info)); +	stfle(lgr_info->stfle_fac_list, ARRAY_SIZE(lgr_info->stfle_fac_list)); +	lgr_info->level = stsi_0(); +	if (lgr_info->level == -ENOSYS) +		return; +	if (lgr_info->level >= 1) +		lgr_stsi_1_1_1(lgr_info); +	if (lgr_info->level >= 2) +		lgr_stsi_2_2_2(lgr_info); +	if (lgr_info->level >= 3) +		lgr_stsi_3_2_2(lgr_info); +} + +/* + * Check if LGR info has changed and if yes log new LGR info to s390dbf + */ +void lgr_info_log(void) +{ +	static DEFINE_SPINLOCK(lgr_info_lock); +	unsigned long flags; + +	if (!spin_trylock_irqsave(&lgr_info_lock, flags)) +		return; +	lgr_info_get(&lgr_info_cur); +	if (memcmp(&lgr_info_last, &lgr_info_cur, sizeof(lgr_info_cur)) != 0) { +		debug_event(lgr_dbf, 1, &lgr_info_cur, sizeof(lgr_info_cur)); +		lgr_info_last = lgr_info_cur; +	} +	spin_unlock_irqrestore(&lgr_info_lock, flags); +} +EXPORT_SYMBOL_GPL(lgr_info_log); + +static void lgr_timer_set(void); + +/* + * LGR timer callback + */ +static void lgr_timer_fn(unsigned long ignored) +{ +	lgr_info_log(); +	lgr_timer_set(); +} + +static struct timer_list lgr_timer = +	TIMER_DEFERRED_INITIALIZER(lgr_timer_fn, 0, 0); + +/* + * Setup next LGR timer + */ +static void lgr_timer_set(void) +{ +	mod_timer(&lgr_timer, jiffies + LGR_TIMER_INTERVAL_SECS * HZ); +} + +/* + * Initialize LGR: Add s390dbf, write initial lgr_info and setup timer + */ +static int __init lgr_init(void) +{ +	lgr_page = (void *) __get_free_pages(GFP_KERNEL, 0); +	if (!lgr_page) +		return -ENOMEM; +	lgr_dbf = debug_register("lgr", 1, 1, sizeof(struct lgr_info)); +	if (!lgr_dbf) { +		free_page((unsigned long) lgr_page); +		return -ENOMEM; +	} +	debug_register_view(lgr_dbf, &debug_hex_ascii_view); +	lgr_info_get(&lgr_info_last); +	debug_event(lgr_dbf, 1, &lgr_info_last, sizeof(lgr_info_last)); +	lgr_timer_set(); +	return 0; +} +module_init(lgr_init); diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 47b168fb29c4..0f8cdf1268d0 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -14,6 +14,7 @@  #include <linux/delay.h>  #include <linux/reboot.h>  #include <linux/ftrace.h> +#include <linux/debug_locks.h>  #include <asm/cio.h>  #include <asm/setup.h>  #include <asm/pgtable.h> @@ -49,50 +50,21 @@ static void add_elf_notes(int cpu)  }  /* - * Store status of next available physical CPU - */ -static int store_status_next(int start_cpu, int this_cpu) -{ -	struct save_area *sa = (void *) 4608 + store_prefix(); -	int cpu, rc; - -	for (cpu = start_cpu; cpu < 65536; cpu++) { -		if (cpu == this_cpu) -			continue; -		do { -			rc = raw_sigp(cpu, sigp_stop_and_store_status); -		} while (rc == sigp_busy); -		if (rc != sigp_order_code_accepted) -			continue; -		if (sa->pref_reg) -			return cpu; -	} -	return -1; -} - -/*   * Initialize CPU ELF notes   */  void setup_regs(void)  {  	unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE; -	int cpu, this_cpu, phys_cpu = 0, first = 1; +	int cpu, this_cpu; -	this_cpu = stap(); - -	if (!S390_lowcore.prefixreg_save_area) -		first = 0; +	this_cpu = smp_find_processor_id(stap()); +	add_elf_notes(this_cpu);  	for_each_online_cpu(cpu) { -		if (first) { -			add_elf_notes(cpu); -			first = 0; +		if (cpu == this_cpu) +			continue; +		if (smp_store_status(cpu))  			continue; -		} -		phys_cpu = store_status_next(phys_cpu, this_cpu); -		if (phys_cpu == -1) -			break;  		add_elf_notes(cpu); -		phys_cpu++;  	}  	/* Copy dump CPU store status info to absolute zero */  	memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area)); @@ -238,10 +210,14 @@ static void __machine_kexec(void *data)  	struct kimage *image = data;  	pfault_fini(); -	if (image->type == KEXEC_TYPE_CRASH) +	tracing_off(); +	debug_locks_off(); +	if (image->type == KEXEC_TYPE_CRASH) { +		lgr_info_log();  		s390_reset_system(__do_machine_kdump, data); -	else +	} else {  		s390_reset_system(__do_machine_kexec, data); +	}  	disabled_wait((unsigned long) __builtin_return_address(0));  } @@ -255,5 +231,5 @@ void machine_kexec(struct kimage *image)  		return;  	tracer_disable();  	smp_send_stop(); -	smp_switch_to_ipl_cpu(__machine_kexec, image); +	smp_call_ipl_cpu(__machine_kexec, image);  } diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 0fd2e863e114..8c372ca61350 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -254,8 +254,6 @@ void notrace s390_do_machine_check(struct pt_regs *regs)  	int umode;  	nmi_enter(); -	s390_idle_check(regs, S390_lowcore.mcck_clock, -			S390_lowcore.mcck_enter_timer);  	kstat_cpu(smp_processor_id()).irqs[NMI_NMI]++;  	mci = (struct mci *) &S390_lowcore.mcck_interruption_code;  	mcck = &__get_cpu_var(cpu_mcck); diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c new file mode 100644 index 000000000000..bbe522672e06 --- /dev/null +++ b/arch/s390/kernel/os_info.c @@ -0,0 +1,169 @@ +/* + * OS info memory interface + * + * Copyright IBM Corp. 2012 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#define KMSG_COMPONENT "os_info" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/crash_dump.h> +#include <linux/kernel.h> +#include <asm/checksum.h> +#include <asm/lowcore.h> +#include <asm/system.h> +#include <asm/os_info.h> + +/* + * OS info structure has to be page aligned + */ +static struct os_info os_info __page_aligned_data; + +/* + * Compute checksum over OS info structure + */ +u32 os_info_csum(struct os_info *os_info) +{ +	int size = sizeof(*os_info) - offsetof(struct os_info, version_major); +	return csum_partial(&os_info->version_major, size, 0); +} + +/* + * Add crashkernel info to OS info and update checksum + */ +void os_info_crashkernel_add(unsigned long base, unsigned long size) +{ +	os_info.crashkernel_addr = (u64)(unsigned long)base; +	os_info.crashkernel_size = (u64)(unsigned long)size; +	os_info.csum = os_info_csum(&os_info); +} + +/* + * Add OS info entry and update checksum + */ +void os_info_entry_add(int nr, void *ptr, u64 size) +{ +	os_info.entry[nr].addr = (u64)(unsigned long)ptr; +	os_info.entry[nr].size = size; +	os_info.entry[nr].csum = csum_partial(ptr, size, 0); +	os_info.csum = os_info_csum(&os_info); +} + +/* + * Initialize OS info struture and set lowcore pointer + */ +void __init os_info_init(void) +{ +	void *ptr = &os_info; + +	os_info.version_major = OS_INFO_VERSION_MAJOR; +	os_info.version_minor = OS_INFO_VERSION_MINOR; +	os_info.magic = OS_INFO_MAGIC; +	os_info.csum = os_info_csum(&os_info); +	copy_to_absolute_zero(&S390_lowcore.os_info, &ptr, sizeof(ptr)); +} + +#ifdef CONFIG_CRASH_DUMP + +static struct os_info *os_info_old; + +/* + * Allocate and copy OS info entry from oldmem + */ +static void os_info_old_alloc(int nr, int align) +{ +	unsigned long addr, size = 0; +	char *buf, *buf_align, *msg; +	u32 csum; + +	addr = os_info_old->entry[nr].addr; +	if (!addr) { +		msg = "not available"; +		goto fail; +	} +	size = os_info_old->entry[nr].size; +	buf = kmalloc(size + align - 1, GFP_KERNEL); +	if (!buf) { +		msg = "alloc failed"; +		goto fail; +	} +	buf_align = PTR_ALIGN(buf, align); +	if (copy_from_oldmem(buf_align, (void *) addr, size)) { +		msg = "copy failed"; +		goto fail_free; +	} +	csum = csum_partial(buf_align, size, 0); +	if (csum != os_info_old->entry[nr].csum) { +		msg = "checksum failed"; +		goto fail_free; +	} +	os_info_old->entry[nr].addr = (u64)(unsigned long)buf_align; +	msg = "copied"; +	goto out; +fail_free: +	kfree(buf); +fail: +	os_info_old->entry[nr].addr = 0; +out: +	pr_info("entry %i: %s (addr=0x%lx size=%lu)\n", +		nr, msg, addr, size); +} + +/* + * Initialize os info and os info entries from oldmem + */ +static void os_info_old_init(void) +{ +	static int os_info_init; +	unsigned long addr; + +	if (os_info_init) +		return; +	if (!OLDMEM_BASE) +		goto fail; +	if (copy_from_oldmem(&addr, &S390_lowcore.os_info, sizeof(addr))) +		goto fail; +	if (addr == 0 || addr % PAGE_SIZE) +		goto fail; +	os_info_old = kzalloc(sizeof(*os_info_old), GFP_KERNEL); +	if (!os_info_old) +		goto fail; +	if (copy_from_oldmem(os_info_old, (void *) addr, sizeof(*os_info_old))) +		goto fail_free; +	if (os_info_old->magic != OS_INFO_MAGIC) +		goto fail_free; +	if (os_info_old->csum != os_info_csum(os_info_old)) +		goto fail_free; +	if (os_info_old->version_major > OS_INFO_VERSION_MAJOR) +		goto fail_free; +	os_info_old_alloc(OS_INFO_VMCOREINFO, 1); +	os_info_old_alloc(OS_INFO_REIPL_BLOCK, 1); +	os_info_old_alloc(OS_INFO_INIT_FN, PAGE_SIZE); +	pr_info("crashkernel: addr=0x%lx size=%lu\n", +		(unsigned long) os_info_old->crashkernel_addr, +		(unsigned long) os_info_old->crashkernel_size); +	os_info_init = 1; +	return; +fail_free: +	kfree(os_info_old); +fail: +	os_info_init = 1; +	os_info_old = NULL; +} + +/* + * Return pointer to os infor entry and its size + */ +void *os_info_old_entry(int nr, unsigned long *size) +{ +	os_info_old_init(); + +	if (!os_info_old) +		return NULL; +	if (!os_info_old->entry[nr].addr) +		return NULL; +	*size = (unsigned long) os_info_old->entry[nr].size; +	return (void *)(unsigned long)os_info_old->entry[nr].addr; +} +#endif diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 4261aa799774..3732e4c09cbe 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -29,7 +29,6 @@  #include <asm/irq.h>  #include <asm/timer.h>  #include <asm/nmi.h> -#include <asm/compat.h>  #include <asm/smp.h>  #include "entry.h" @@ -78,13 +77,8 @@ static void default_idle(void)  		local_irq_enable();  		return;  	} -	trace_hardirqs_on(); -	/* Don't trace preempt off for idle. */ -	stop_critical_timings(); -	/* Stop virtual timer and halt the cpu. */ +	/* Halt the cpu and keep track of cpu time accounting. */  	vtime_stop_cpu(); -	/* Reenable preemption tracer. */ -	start_critical_timings();  }  void cpu_idle(void) @@ -98,9 +92,7 @@ void cpu_idle(void)  		tick_nohz_idle_exit();  		if (test_thread_flag(TIF_MCCK_PENDING))  			s390_handle_mcck(); -		preempt_enable_no_resched(); -		schedule(); -		preempt_disable(); +		schedule_preempt_disabled();  	}  } diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 9d82ed4bcb27..61f95489d70c 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -20,8 +20,8 @@  #include <linux/regset.h>  #include <linux/tracehook.h>  #include <linux/seccomp.h> +#include <linux/compat.h>  #include <trace/syscall.h> -#include <asm/compat.h>  #include <asm/segment.h>  #include <asm/page.h>  #include <asm/pgtable.h> diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 354de0763eff..38e751278bf7 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -2,7 +2,7 @@   *  arch/s390/kernel/setup.c   *   *  S390 version - *    Copyright (C) IBM Corp. 1999,2010 + *    Copyright (C) IBM Corp. 1999,2012   *    Author(s): Hartmut Penner (hp@de.ibm.com),   *               Martin Schwidefsky (schwidefsky@de.ibm.com)   * @@ -46,6 +46,7 @@  #include <linux/kexec.h>  #include <linux/crash_dump.h>  #include <linux/memory.h> +#include <linux/compat.h>  #include <asm/ipl.h>  #include <asm/uaccess.h> @@ -59,9 +60,10 @@  #include <asm/ptrace.h>  #include <asm/sections.h>  #include <asm/ebcdic.h> -#include <asm/compat.h>  #include <asm/kvm_virtio.h>  #include <asm/diag.h> +#include <asm/os_info.h> +#include "entry.h"  long psw_kernel_bits	= PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_PRIMARY |  			  PSW_MASK_EA | PSW_MASK_BA; @@ -351,8 +353,9 @@ static void setup_addressing_mode(void)  	}  } -static void __init -setup_lowcore(void) +void *restart_stack __attribute__((__section__(".data"))); + +static void __init setup_lowcore(void)  {  	struct _lowcore *lc; @@ -363,7 +366,7 @@ setup_lowcore(void)  	lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0);  	lc->restart_psw.mask = psw_kernel_bits;  	lc->restart_psw.addr = -		PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; +		PSW_ADDR_AMODE | (unsigned long) restart_int_handler;  	lc->external_new_psw.mask = psw_kernel_bits |  		PSW_MASK_DAT | PSW_MASK_MCHECK;  	lc->external_new_psw.addr = @@ -412,6 +415,24 @@ setup_lowcore(void)  	lc->last_update_timer = S390_lowcore.last_update_timer;  	lc->last_update_clock = S390_lowcore.last_update_clock;  	lc->ftrace_func = S390_lowcore.ftrace_func; + +	restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0); +	restart_stack += ASYNC_SIZE; + +	/* +	 * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant +	 * restart data to the absolute zero lowcore. This is necesary if +	 * PSW restart is done on an offline CPU that has lowcore zero. +	 */ +	lc->restart_stack = (unsigned long) restart_stack; +	lc->restart_fn = (unsigned long) do_restart; +	lc->restart_data = 0; +	lc->restart_source = -1UL; +	memcpy(&S390_lowcore.restart_stack, &lc->restart_stack, +	       4*sizeof(unsigned long)); +	copy_to_absolute_zero(&S390_lowcore.restart_psw, +			      &lc->restart_psw, sizeof(psw_t)); +  	set_prefix((u32)(unsigned long) lc);  	lowcore_ptr[0] = lc;  } @@ -572,27 +593,6 @@ static void __init setup_memory_end(void)  	}  } -void *restart_stack __attribute__((__section__(".data"))); - -/* - * Setup new PSW and allocate stack for PSW restart interrupt - */ -static void __init setup_restart_psw(void) -{ -	psw_t psw; - -	restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0); -	restart_stack += ASYNC_SIZE; - -	/* -	 * Setup restart PSW for absolute zero lowcore. This is necesary -	 * if PSW restart is done on an offline CPU that has lowcore zero -	 */ -	psw.mask = PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; -	psw.addr = PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; -	copy_to_absolute_zero(&S390_lowcore.restart_psw, &psw, sizeof(psw)); -} -  static void __init setup_vmcoreinfo(void)  {  #ifdef CONFIG_KEXEC @@ -747,7 +747,7 @@ static void __init reserve_crashkernel(void)  {  #ifdef CONFIG_CRASH_DUMP  	unsigned long long crash_base, crash_size; -	char *msg; +	char *msg = NULL;  	int rc;  	rc = parse_crashkernel(boot_command_line, memory_end, &crash_size, @@ -779,11 +779,11 @@ static void __init reserve_crashkernel(void)  	pr_info("Reserving %lluMB of memory at %lluMB "  		"for crashkernel (System RAM: %luMB)\n",  		crash_size >> 20, crash_base >> 20, memory_end >> 20); +	os_info_crashkernel_add(crash_base, crash_size);  #endif  } -static void __init -setup_memory(void) +static void __init setup_memory(void)  {          unsigned long bootmap_size;  	unsigned long start_pfn, end_pfn; @@ -1014,8 +1014,7 @@ static void __init setup_hwcaps(void)   * was printed.   */ -void __init -setup_arch(char **cmdline_p) +void __init setup_arch(char **cmdline_p)  {          /*           * print what head.S has found out about the machine @@ -1060,6 +1059,7 @@ setup_arch(char **cmdline_p)  	parse_early_param(); +	os_info_init();  	setup_ipl();  	setup_memory_end();  	setup_addressing_mode(); @@ -1068,7 +1068,6 @@ setup_arch(char **cmdline_p)  	setup_memory();  	setup_resources();  	setup_vmcoreinfo(); -	setup_restart_psw();  	setup_lowcore();          cpu_init(); diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index a8ba840294ff..f29f5ef400e5 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -30,7 +30,6 @@  #include <asm/ucontext.h>  #include <asm/uaccess.h>  #include <asm/lowcore.h> -#include <asm/compat.h>  #include "entry.h"  #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) @@ -385,7 +384,6 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka,  			 siginfo_t *info, sigset_t *oldset,  			 struct pt_regs *regs)  { -	sigset_t blocked;  	int ret;  	/* Set up the stack frame */ @@ -395,10 +393,7 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka,  		ret = setup_frame(sig, ka, oldset, regs);  	if (ret)  		return ret; -	sigorsets(&blocked, ¤t->blocked, &ka->sa.sa_mask); -	if (!(ka->sa.sa_flags & SA_NODEFER)) -		sigaddset(&blocked, sig); -	set_current_blocked(&blocked); +	block_sigmask(ka, sig);  	return 0;  } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 2398ce6b15ae..a8bf9994b086 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -1,23 +1,18 @@  /* - *  arch/s390/kernel/smp.c + *  SMP related functions   * - *    Copyright IBM Corp. 1999, 2009 - *    Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), - *		 Martin Schwidefsky (schwidefsky@de.ibm.com) - *		 Heiko Carstens (heiko.carstens@de.ibm.com) + *    Copyright IBM Corp. 1999,2012 + *    Author(s): Denis Joseph Barrow, + *		 Martin Schwidefsky <schwidefsky@de.ibm.com>, + *		 Heiko Carstens <heiko.carstens@de.ibm.com>,   *   *  based on other smp stuff by   *    (c) 1995 Alan Cox, CymruNET Ltd  <alan@cymru.net>   *    (c) 1998 Ingo Molnar   * - * We work with logical cpu numbering everywhere we can. The only - * functions using the real cpu address (got from STAP) are the sigp - * functions. For all other functions we use the identity mapping. - * That means that cpu_number_map[i] == i for every cpu. cpu_number_map is - * used e.g. to find the idle task belonging to a logical cpu. Every array - * in the kernel is sorted by the logical cpu number and not by the physical - * one which is causing all the confusion with __cpu_logical_map and - * cpu_number_map in other architectures. + * The code outside of smp.c uses logical cpu numbers, only smp.c does + * the translation of logical to physical cpu ids. All new code that + * operates on physical cpu numbers needs to go into smp.c.   */  #define KMSG_COMPONENT "cpu" @@ -31,198 +26,433 @@  #include <linux/spinlock.h>  #include <linux/kernel_stat.h>  #include <linux/delay.h> -#include <linux/cache.h>  #include <linux/interrupt.h>  #include <linux/irqflags.h>  #include <linux/cpu.h> -#include <linux/timex.h> -#include <linux/bootmem.h>  #include <linux/slab.h>  #include <linux/crash_dump.h>  #include <asm/asm-offsets.h>  #include <asm/ipl.h>  #include <asm/setup.h> -#include <asm/sigp.h> -#include <asm/pgalloc.h>  #include <asm/irq.h> -#include <asm/cpcmd.h>  #include <asm/tlbflush.h>  #include <asm/timer.h>  #include <asm/lowcore.h>  #include <asm/sclp.h> -#include <asm/cputime.h>  #include <asm/vdso.h> -#include <asm/cpu.h> +#include <asm/debug.h> +#include <asm/os_info.h>  #include "entry.h" -/* logical cpu to cpu address */ -unsigned short __cpu_logical_map[NR_CPUS]; +enum { +	sigp_sense = 1, +	sigp_external_call = 2, +	sigp_emergency_signal = 3, +	sigp_start = 4, +	sigp_stop = 5, +	sigp_restart = 6, +	sigp_stop_and_store_status = 9, +	sigp_initial_cpu_reset = 11, +	sigp_cpu_reset = 12, +	sigp_set_prefix = 13, +	sigp_store_status_at_address = 14, +	sigp_store_extended_status_at_address = 15, +	sigp_set_architecture = 18, +	sigp_conditional_emergency_signal = 19, +	sigp_sense_running = 21, +}; -static struct task_struct *current_set[NR_CPUS]; +enum { +	sigp_order_code_accepted = 0, +	sigp_status_stored = 1, +	sigp_busy = 2, +	sigp_not_operational = 3, +}; -static u8 smp_cpu_type; -static int smp_use_sigp_detection; +enum { +	ec_schedule = 0, +	ec_call_function, +	ec_call_function_single, +	ec_stop_cpu, +}; -enum s390_cpu_state { +enum {  	CPU_STATE_STANDBY,  	CPU_STATE_CONFIGURED,  }; +struct pcpu { +	struct cpu cpu; +	struct task_struct *idle;	/* idle process for the cpu */ +	struct _lowcore *lowcore;	/* lowcore page(s) for the cpu */ +	unsigned long async_stack;	/* async stack for the cpu */ +	unsigned long panic_stack;	/* panic stack for the cpu */ +	unsigned long ec_mask;		/* bit mask for ec_xxx functions */ +	int state;			/* physical cpu state */ +	u32 status;			/* last status received via sigp */ +	u16 address;			/* physical cpu address */ +}; + +static u8 boot_cpu_type; +static u16 boot_cpu_address; +static struct pcpu pcpu_devices[NR_CPUS]; +  DEFINE_MUTEX(smp_cpu_state_mutex); -static int smp_cpu_state[NR_CPUS]; -static DEFINE_PER_CPU(struct cpu, cpu_devices); +/* + * Signal processor helper functions. + */ +static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status) +{ +	register unsigned int reg1 asm ("1") = parm; +	int cc; -static void smp_ext_bitcall(int, int); +	asm volatile( +		"	sigp	%1,%2,0(%3)\n" +		"	ipm	%0\n" +		"	srl	%0,28\n" +		: "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc"); +	if (status && cc == 1) +		*status = reg1; +	return cc; +} -static int raw_cpu_stopped(int cpu) +static inline int __pcpu_sigp_relax(u16 addr, u8 order, u32 parm, u32 *status)  { -	u32 status; +	int cc; -	switch (raw_sigp_ps(&status, 0, cpu, sigp_sense)) { -	case sigp_status_stored: -		/* Check for stopped and check stop state */ -		if (status & 0x50) -			return 1; -		break; -	default: -		break; +	while (1) { +		cc = __pcpu_sigp(addr, order, parm, status); +		if (cc != sigp_busy) +			return cc; +		cpu_relax();  	} -	return 0;  } -static inline int cpu_stopped(int cpu) +static int pcpu_sigp_retry(struct pcpu *pcpu, u8 order, u32 parm)  { -	return raw_cpu_stopped(cpu_logical_map(cpu)); +	int cc, retry; + +	for (retry = 0; ; retry++) { +		cc = __pcpu_sigp(pcpu->address, order, parm, &pcpu->status); +		if (cc != sigp_busy) +			break; +		if (retry >= 3) +			udelay(10); +	} +	return cc; +} + +static inline int pcpu_stopped(struct pcpu *pcpu) +{ +	if (__pcpu_sigp(pcpu->address, sigp_sense, +			0, &pcpu->status) != sigp_status_stored) +		return 0; +	/* Check for stopped and check stop state */ +	return !!(pcpu->status & 0x50); +} + +static inline int pcpu_running(struct pcpu *pcpu) +{ +	if (__pcpu_sigp(pcpu->address, sigp_sense_running, +			0, &pcpu->status) != sigp_status_stored) +		return 1; +	/* Check for running status */ +	return !(pcpu->status & 0x400);  }  /* - * Ensure that PSW restart is done on an online CPU + * Find struct pcpu by cpu address.   */ -void smp_restart_with_online_cpu(void) +static struct pcpu *pcpu_find_address(const struct cpumask *mask, int address)  {  	int cpu; -	for_each_online_cpu(cpu) { -		if (stap() == __cpu_logical_map[cpu]) { -			/* We are online: Enable DAT again and return */ -			__load_psw_mask(psw_kernel_bits | PSW_MASK_DAT); -			return; -		} +	for_each_cpu(cpu, mask) +		if (pcpu_devices[cpu].address == address) +			return pcpu_devices + cpu; +	return NULL; +} + +static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) +{ +	int order; + +	set_bit(ec_bit, &pcpu->ec_mask); +	order = pcpu_running(pcpu) ? +		sigp_external_call : sigp_emergency_signal; +	pcpu_sigp_retry(pcpu, order, 0); +} + +static int __cpuinit pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) +{ +	struct _lowcore *lc; + +	if (pcpu != &pcpu_devices[0]) { +		pcpu->lowcore =	(struct _lowcore *) +			__get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); +		pcpu->async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); +		pcpu->panic_stack = __get_free_page(GFP_KERNEL); +		if (!pcpu->lowcore || !pcpu->panic_stack || !pcpu->async_stack) +			goto out;  	} -	/* We are not online: Do PSW restart on an online CPU */ -	while (sigp(cpu, sigp_restart) == sigp_busy) -		cpu_relax(); -	/* And stop ourself */ -	while (raw_sigp(stap(), sigp_stop) == sigp_busy) -		cpu_relax(); -	for (;;); +	lc = pcpu->lowcore; +	memcpy(lc, &S390_lowcore, 512); +	memset((char *) lc + 512, 0, sizeof(*lc) - 512); +	lc->async_stack = pcpu->async_stack + ASYNC_SIZE; +	lc->panic_stack = pcpu->panic_stack + PAGE_SIZE; +	lc->cpu_nr = cpu; +#ifndef CONFIG_64BIT +	if (MACHINE_HAS_IEEE) { +		lc->extended_save_area_addr = get_zeroed_page(GFP_KERNEL); +		if (!lc->extended_save_area_addr) +			goto out; +	} +#else +	if (vdso_alloc_per_cpu(lc)) +		goto out; +#endif +	lowcore_ptr[cpu] = lc; +	pcpu_sigp_retry(pcpu, sigp_set_prefix, (u32)(unsigned long) lc); +	return 0; +out: +	if (pcpu != &pcpu_devices[0]) { +		free_page(pcpu->panic_stack); +		free_pages(pcpu->async_stack, ASYNC_ORDER); +		free_pages((unsigned long) pcpu->lowcore, LC_ORDER); +	} +	return -ENOMEM;  } -void smp_switch_to_ipl_cpu(void (*func)(void *), void *data) +static void pcpu_free_lowcore(struct pcpu *pcpu)  { -	struct _lowcore *lc, *current_lc; -	struct stack_frame *sf; -	struct pt_regs *regs; -	unsigned long sp; - -	if (smp_processor_id() == 0) -		func(data); -	__load_psw_mask(PSW_DEFAULT_KEY | PSW_MASK_BASE | -			PSW_MASK_EA | PSW_MASK_BA); -	/* Disable lowcore protection */ -	__ctl_clear_bit(0, 28); -	current_lc = lowcore_ptr[smp_processor_id()]; -	lc = lowcore_ptr[0]; -	if (!lc) -		lc = current_lc; -	lc->restart_psw.mask = -		PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; -	lc->restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) smp_restart_cpu; -	if (!cpu_online(0)) -		smp_switch_to_cpu(func, data, 0, stap(), __cpu_logical_map[0]); -	while (sigp(0, sigp_stop_and_store_status) == sigp_busy) -		cpu_relax(); -	sp = lc->panic_stack; -	sp -= sizeof(struct pt_regs); -	regs = (struct pt_regs *) sp; -	memcpy(®s->gprs, ¤t_lc->gpregs_save_area, sizeof(regs->gprs)); -	regs->psw = current_lc->psw_save_area; -	sp -= STACK_FRAME_OVERHEAD; -	sf = (struct stack_frame *) sp; -	sf->back_chain = 0; -	smp_switch_to_cpu(func, data, sp, stap(), __cpu_logical_map[0]); +	pcpu_sigp_retry(pcpu, sigp_set_prefix, 0); +	lowcore_ptr[pcpu - pcpu_devices] = NULL; +#ifndef CONFIG_64BIT +	if (MACHINE_HAS_IEEE) { +		struct _lowcore *lc = pcpu->lowcore; + +		free_page((unsigned long) lc->extended_save_area_addr); +		lc->extended_save_area_addr = 0; +	} +#else +	vdso_free_per_cpu(pcpu->lowcore); +#endif +	if (pcpu != &pcpu_devices[0]) { +		free_page(pcpu->panic_stack); +		free_pages(pcpu->async_stack, ASYNC_ORDER); +		free_pages((unsigned long) pcpu->lowcore, LC_ORDER); +	} +} + +static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) +{ +	struct _lowcore *lc = pcpu->lowcore; + +	atomic_inc(&init_mm.context.attach_count); +	lc->cpu_nr = cpu; +	lc->percpu_offset = __per_cpu_offset[cpu]; +	lc->kernel_asce = S390_lowcore.kernel_asce; +	lc->machine_flags = S390_lowcore.machine_flags; +	lc->ftrace_func = S390_lowcore.ftrace_func; +	lc->user_timer = lc->system_timer = lc->steal_timer = 0; +	__ctl_store(lc->cregs_save_area, 0, 15); +	save_access_regs((unsigned int *) lc->access_regs_save_area); +	memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, +	       MAX_FACILITY_BIT/8); +} + +static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) +{ +	struct _lowcore *lc = pcpu->lowcore; +	struct thread_info *ti = task_thread_info(tsk); + +	lc->kernel_stack = (unsigned long) task_stack_page(tsk) + THREAD_SIZE; +	lc->thread_info = (unsigned long) task_thread_info(tsk); +	lc->current_task = (unsigned long) tsk; +	lc->user_timer = ti->user_timer; +	lc->system_timer = ti->system_timer; +	lc->steal_timer = 0; +} + +static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data) +{ +	struct _lowcore *lc = pcpu->lowcore; + +	lc->restart_stack = lc->kernel_stack; +	lc->restart_fn = (unsigned long) func; +	lc->restart_data = (unsigned long) data; +	lc->restart_source = -1UL; +	pcpu_sigp_retry(pcpu, sigp_restart, 0); +} + +/* + * Call function via PSW restart on pcpu and stop the current cpu. + */ +static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *), +			  void *data, unsigned long stack) +{ +	struct _lowcore *lc = pcpu->lowcore; +	unsigned short this_cpu; + +	__load_psw_mask(psw_kernel_bits); +	this_cpu = stap(); +	if (pcpu->address == this_cpu) +		func(data);	/* should not return */ +	/* Stop target cpu (if func returns this stops the current cpu). */ +	pcpu_sigp_retry(pcpu, sigp_stop, 0); +	/* Restart func on the target cpu and stop the current cpu. */ +	lc->restart_stack = stack; +	lc->restart_fn = (unsigned long) func; +	lc->restart_data = (unsigned long) data; +	lc->restart_source = (unsigned long) this_cpu; +	asm volatile( +		"0:	sigp	0,%0,6	# sigp restart to target cpu\n" +		"	brc	2,0b	# busy, try again\n" +		"1:	sigp	0,%1,5	# sigp stop to current cpu\n" +		"	brc	2,1b	# busy, try again\n" +		: : "d" (pcpu->address), "d" (this_cpu) : "0", "1", "cc"); +	for (;;) ; +} + +/* + * Call function on an online CPU. + */ +void smp_call_online_cpu(void (*func)(void *), void *data) +{ +	struct pcpu *pcpu; + +	/* Use the current cpu if it is online. */ +	pcpu = pcpu_find_address(cpu_online_mask, stap()); +	if (!pcpu) +		/* Use the first online cpu. */ +		pcpu = pcpu_devices + cpumask_first(cpu_online_mask); +	pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack);  } -static void smp_stop_cpu(void) +/* + * Call function on the ipl CPU. + */ +void smp_call_ipl_cpu(void (*func)(void *), void *data)  { -	while (sigp(smp_processor_id(), sigp_stop) == sigp_busy) +	pcpu_delegate(&pcpu_devices[0], func, data, +		      pcpu_devices->panic_stack + PAGE_SIZE); +} + +int smp_find_processor_id(u16 address) +{ +	int cpu; + +	for_each_present_cpu(cpu) +		if (pcpu_devices[cpu].address == address) +			return cpu; +	return -1; +} + +int smp_vcpu_scheduled(int cpu) +{ +	return pcpu_running(pcpu_devices + cpu); +} + +void smp_yield(void) +{ +	if (MACHINE_HAS_DIAG44) +		asm volatile("diag 0,0,0x44"); +} + +void smp_yield_cpu(int cpu) +{ +	if (MACHINE_HAS_DIAG9C) +		asm volatile("diag %0,0,0x9c" +			     : : "d" (pcpu_devices[cpu].address)); +	else if (MACHINE_HAS_DIAG44) +		asm volatile("diag 0,0,0x44"); +} + +/* + * Send cpus emergency shutdown signal. This gives the cpus the + * opportunity to complete outstanding interrupts. + */ +void smp_emergency_stop(cpumask_t *cpumask) +{ +	u64 end; +	int cpu; + +	end = get_clock() + (1000000UL << 12); +	for_each_cpu(cpu, cpumask) { +		struct pcpu *pcpu = pcpu_devices + cpu; +		set_bit(ec_stop_cpu, &pcpu->ec_mask); +		while (__pcpu_sigp(pcpu->address, sigp_emergency_signal, +				   0, NULL) == sigp_busy && +		       get_clock() < end) +			cpu_relax(); +	} +	while (get_clock() < end) { +		for_each_cpu(cpu, cpumask) +			if (pcpu_stopped(pcpu_devices + cpu)) +				cpumask_clear_cpu(cpu, cpumask); +		if (cpumask_empty(cpumask)) +			break;  		cpu_relax(); +	}  } +/* + * Stop all cpus but the current one. + */  void smp_send_stop(void)  {  	cpumask_t cpumask;  	int cpu; -	u64 end;  	/* Disable all interrupts/machine checks */  	__load_psw_mask(psw_kernel_bits | PSW_MASK_DAT);  	trace_hardirqs_off(); +	debug_set_critical();  	cpumask_copy(&cpumask, cpu_online_mask);  	cpumask_clear_cpu(smp_processor_id(), &cpumask); -	if (oops_in_progress) { -		/* -		 * Give the other cpus the opportunity to complete -		 * outstanding interrupts before stopping them. -		 */ -		end = get_clock() + (1000000UL << 12); -		for_each_cpu(cpu, &cpumask) { -			set_bit(ec_stop_cpu, (unsigned long *) -				&lowcore_ptr[cpu]->ext_call_fast); -			while (sigp(cpu, sigp_emergency_signal) == sigp_busy && -			       get_clock() < end) -				cpu_relax(); -		} -		while (get_clock() < end) { -			for_each_cpu(cpu, &cpumask) -				if (cpu_stopped(cpu)) -					cpumask_clear_cpu(cpu, &cpumask); -			if (cpumask_empty(&cpumask)) -				break; -			cpu_relax(); -		} -	} +	if (oops_in_progress) +		smp_emergency_stop(&cpumask);  	/* stop all processors */  	for_each_cpu(cpu, &cpumask) { -		while (sigp(cpu, sigp_stop) == sigp_busy) -			cpu_relax(); -		while (!cpu_stopped(cpu)) +		struct pcpu *pcpu = pcpu_devices + cpu; +		pcpu_sigp_retry(pcpu, sigp_stop, 0); +		while (!pcpu_stopped(pcpu))  			cpu_relax();  	}  }  /* + * Stop the current cpu. + */ +void smp_stop_cpu(void) +{ +	pcpu_sigp_retry(pcpu_devices + smp_processor_id(), sigp_stop, 0); +	for (;;) ; +} + +/*   * This is the main routine where commands issued by other   * cpus are handled.   */ - -static void do_ext_call_interrupt(unsigned int ext_int_code, +static void do_ext_call_interrupt(struct ext_code ext_code,  				  unsigned int param32, unsigned long param64)  {  	unsigned long bits; +	int cpu; -	if ((ext_int_code & 0xffff) == 0x1202) -		kstat_cpu(smp_processor_id()).irqs[EXTINT_EXC]++; +	cpu = smp_processor_id(); +	if (ext_code.code == 0x1202) +		kstat_cpu(cpu).irqs[EXTINT_EXC]++;  	else -		kstat_cpu(smp_processor_id()).irqs[EXTINT_EMS]++; +		kstat_cpu(cpu).irqs[EXTINT_EMS]++;  	/*  	 * handle bit signal external calls  	 */ -	bits = xchg(&S390_lowcore.ext_call_fast, 0); +	bits = xchg(&pcpu_devices[cpu].ec_mask, 0);  	if (test_bit(ec_stop_cpu, &bits))  		smp_stop_cpu(); @@ -238,38 +468,17 @@ static void do_ext_call_interrupt(unsigned int ext_int_code,  } -/* - * Send an external call sigp to another cpu and return without waiting - * for its completion. - */ -static void smp_ext_bitcall(int cpu, int sig) -{ -	int order; - -	/* -	 * Set signaling bit in lowcore of target cpu and kick it -	 */ -	set_bit(sig, (unsigned long *) &lowcore_ptr[cpu]->ext_call_fast); -	while (1) { -		order = smp_vcpu_scheduled(cpu) ? -			sigp_external_call : sigp_emergency_signal; -		if (sigp(cpu, order) != sigp_busy) -			break; -		udelay(10); -	} -} -  void arch_send_call_function_ipi_mask(const struct cpumask *mask)  {  	int cpu;  	for_each_cpu(cpu, mask) -		smp_ext_bitcall(cpu, ec_call_function); +		pcpu_ec_call(pcpu_devices + cpu, ec_call_function);  }  void arch_send_call_function_single_ipi(int cpu)  { -	smp_ext_bitcall(cpu, ec_call_function_single); +	pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);  }  #ifndef CONFIG_64BIT @@ -295,15 +504,16 @@ EXPORT_SYMBOL(smp_ptlb_all);   */  void smp_send_reschedule(int cpu)  { -	smp_ext_bitcall(cpu, ec_schedule); +	pcpu_ec_call(pcpu_devices + cpu, ec_schedule);  }  /*   * parameter area for the set/clear control bit callbacks   */  struct ec_creg_mask_parms { -	unsigned long orvals[16]; -	unsigned long andvals[16]; +	unsigned long orval; +	unsigned long andval; +	int cr;  };  /* @@ -313,11 +523,9 @@ static void smp_ctl_bit_callback(void *info)  {  	struct ec_creg_mask_parms *pp = info;  	unsigned long cregs[16]; -	int i;  	__ctl_store(cregs, 0, 15); -	for (i = 0; i <= 15; i++) -		cregs[i] = (cregs[i] & pp->andvals[i]) | pp->orvals[i]; +	cregs[pp->cr] = (cregs[pp->cr] & pp->andval) | pp->orval;  	__ctl_load(cregs, 0, 15);  } @@ -326,11 +534,8 @@ static void smp_ctl_bit_callback(void *info)   */  void smp_ctl_set_bit(int cr, int bit)  { -	struct ec_creg_mask_parms parms; +	struct ec_creg_mask_parms parms = { 1UL << bit, -1UL, cr }; -	memset(&parms.orvals, 0, sizeof(parms.orvals)); -	memset(&parms.andvals, 0xff, sizeof(parms.andvals)); -	parms.orvals[cr] = 1UL << bit;  	on_each_cpu(smp_ctl_bit_callback, &parms, 1);  }  EXPORT_SYMBOL(smp_ctl_set_bit); @@ -340,226 +545,178 @@ EXPORT_SYMBOL(smp_ctl_set_bit);   */  void smp_ctl_clear_bit(int cr, int bit)  { -	struct ec_creg_mask_parms parms; +	struct ec_creg_mask_parms parms = { 0, ~(1UL << bit), cr }; -	memset(&parms.orvals, 0, sizeof(parms.orvals)); -	memset(&parms.andvals, 0xff, sizeof(parms.andvals)); -	parms.andvals[cr] = ~(1UL << bit);  	on_each_cpu(smp_ctl_bit_callback, &parms, 1);  }  EXPORT_SYMBOL(smp_ctl_clear_bit);  #if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_CRASH_DUMP) -static void __init smp_get_save_area(unsigned int cpu, unsigned int phy_cpu) +struct save_area *zfcpdump_save_areas[NR_CPUS + 1]; +EXPORT_SYMBOL_GPL(zfcpdump_save_areas); + +static void __init smp_get_save_area(int cpu, u16 address)  { -	if (ipl_info.type != IPL_TYPE_FCP_DUMP && !OLDMEM_BASE) -		return; +	void *lc = pcpu_devices[0].lowcore; +	struct save_area *save_area; +  	if (is_kdump_kernel())  		return; +	if (!OLDMEM_BASE && (address == boot_cpu_address || +			     ipl_info.type != IPL_TYPE_FCP_DUMP)) +		return;  	if (cpu >= NR_CPUS) { -		pr_warning("CPU %i exceeds the maximum %i and is excluded from " -			   "the dump\n", cpu, NR_CPUS - 1); +		pr_warning("CPU %i exceeds the maximum %i and is excluded " +			   "from the dump\n", cpu, NR_CPUS - 1);  		return;  	} -	zfcpdump_save_areas[cpu] = kmalloc(sizeof(struct save_area), GFP_KERNEL); -	while (raw_sigp(phy_cpu, sigp_stop_and_store_status) == sigp_busy) -		cpu_relax(); -	memcpy_real(zfcpdump_save_areas[cpu], -		    (void *)(unsigned long) store_prefix() + SAVE_AREA_BASE, -		    sizeof(struct save_area)); +	save_area = kmalloc(sizeof(struct save_area), GFP_KERNEL); +	if (!save_area) +		panic("could not allocate memory for save area\n"); +	zfcpdump_save_areas[cpu] = save_area; +#ifdef CONFIG_CRASH_DUMP +	if (address == boot_cpu_address) { +		/* Copy the registers of the boot cpu. */ +		copy_oldmem_page(1, (void *) save_area, sizeof(*save_area), +				 SAVE_AREA_BASE - PAGE_SIZE, 0); +		return; +	} +#endif +	/* Get the registers of a non-boot cpu. */ +	__pcpu_sigp_relax(address, sigp_stop_and_store_status, 0, NULL); +	memcpy_real(save_area, lc + SAVE_AREA_BASE, sizeof(*save_area));  } -struct save_area *zfcpdump_save_areas[NR_CPUS + 1]; -EXPORT_SYMBOL_GPL(zfcpdump_save_areas); - -#else - -static inline void smp_get_save_area(unsigned int cpu, unsigned int phy_cpu) { } - -#endif /* CONFIG_ZFCPDUMP */ - -static int cpu_known(int cpu_id) +int smp_store_status(int cpu)  { -	int cpu; +	struct pcpu *pcpu; -	for_each_present_cpu(cpu) { -		if (__cpu_logical_map[cpu] == cpu_id) -			return 1; -	} +	pcpu = pcpu_devices + cpu; +	if (__pcpu_sigp_relax(pcpu->address, sigp_stop_and_store_status, +			      0, NULL) != sigp_order_code_accepted) +		return -EIO;  	return 0;  } -static int smp_rescan_cpus_sigp(cpumask_t avail) -{ -	int cpu_id, logical_cpu; +#else /* CONFIG_ZFCPDUMP || CONFIG_CRASH_DUMP */ -	logical_cpu = cpumask_first(&avail); -	if (logical_cpu >= nr_cpu_ids) -		return 0; -	for (cpu_id = 0; cpu_id <= MAX_CPU_ADDRESS; cpu_id++) { -		if (cpu_known(cpu_id)) -			continue; -		__cpu_logical_map[logical_cpu] = cpu_id; -		cpu_set_polarization(logical_cpu, POLARIZATION_UNKNOWN); -		if (!cpu_stopped(logical_cpu)) -			continue; -		set_cpu_present(logical_cpu, true); -		smp_cpu_state[logical_cpu] = CPU_STATE_CONFIGURED; -		logical_cpu = cpumask_next(logical_cpu, &avail); -		if (logical_cpu >= nr_cpu_ids) -			break; -	} -	return 0; -} +static inline void smp_get_save_area(int cpu, u16 address) { } + +#endif /* CONFIG_ZFCPDUMP || CONFIG_CRASH_DUMP */ -static int smp_rescan_cpus_sclp(cpumask_t avail) +static struct sclp_cpu_info *smp_get_cpu_info(void)  { +	static int use_sigp_detection;  	struct sclp_cpu_info *info; -	int cpu_id, logical_cpu, cpu; -	int rc; - -	logical_cpu = cpumask_first(&avail); -	if (logical_cpu >= nr_cpu_ids) -		return 0; -	info = kmalloc(sizeof(*info), GFP_KERNEL); -	if (!info) -		return -ENOMEM; -	rc = sclp_get_cpu_info(info); -	if (rc) -		goto out; -	for (cpu = 0; cpu < info->combined; cpu++) { -		if (info->has_cpu_type && info->cpu[cpu].type != smp_cpu_type) -			continue; -		cpu_id = info->cpu[cpu].address; -		if (cpu_known(cpu_id)) -			continue; -		__cpu_logical_map[logical_cpu] = cpu_id; -		cpu_set_polarization(logical_cpu, POLARIZATION_UNKNOWN); -		set_cpu_present(logical_cpu, true); -		if (cpu >= info->configured) -			smp_cpu_state[logical_cpu] = CPU_STATE_STANDBY; -		else -			smp_cpu_state[logical_cpu] = CPU_STATE_CONFIGURED; -		logical_cpu = cpumask_next(logical_cpu, &avail); -		if (logical_cpu >= nr_cpu_ids) -			break; +	int address; + +	info = kzalloc(sizeof(*info), GFP_KERNEL); +	if (info && (use_sigp_detection || sclp_get_cpu_info(info))) { +		use_sigp_detection = 1; +		for (address = 0; address <= MAX_CPU_ADDRESS; address++) { +			if (__pcpu_sigp_relax(address, sigp_sense, 0, NULL) == +			    sigp_not_operational) +				continue; +			info->cpu[info->configured].address = address; +			info->configured++; +		} +		info->combined = info->configured;  	} -out: -	kfree(info); -	return rc; +	return info;  } -static int __smp_rescan_cpus(void) +static int __devinit smp_add_present_cpu(int cpu); + +static int __devinit __smp_rescan_cpus(struct sclp_cpu_info *info, +				       int sysfs_add)  { +	struct pcpu *pcpu;  	cpumask_t avail; +	int cpu, nr, i; +	nr = 0;  	cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); -	if (smp_use_sigp_detection) -		return smp_rescan_cpus_sigp(avail); -	else -		return smp_rescan_cpus_sclp(avail); +	cpu = cpumask_first(&avail); +	for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) { +		if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type) +			continue; +		if (pcpu_find_address(cpu_present_mask, info->cpu[i].address)) +			continue; +		pcpu = pcpu_devices + cpu; +		pcpu->address = info->cpu[i].address; +		pcpu->state = (cpu >= info->configured) ? +			CPU_STATE_STANDBY : CPU_STATE_CONFIGURED; +		cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); +		set_cpu_present(cpu, true); +		if (sysfs_add && smp_add_present_cpu(cpu) != 0) +			set_cpu_present(cpu, false); +		else +			nr++; +		cpu = cpumask_next(cpu, &avail); +	} +	return nr;  }  static void __init smp_detect_cpus(void)  {  	unsigned int cpu, c_cpus, s_cpus;  	struct sclp_cpu_info *info; -	u16 boot_cpu_addr, cpu_addr; -	c_cpus = 1; -	s_cpus = 0; -	boot_cpu_addr = __cpu_logical_map[0]; -	info = kmalloc(sizeof(*info), GFP_KERNEL); +	info = smp_get_cpu_info();  	if (!info)  		panic("smp_detect_cpus failed to allocate memory\n"); -#ifdef CONFIG_CRASH_DUMP -	if (OLDMEM_BASE && !is_kdump_kernel()) { -		struct save_area *save_area; - -		save_area = kmalloc(sizeof(*save_area), GFP_KERNEL); -		if (!save_area) -			panic("could not allocate memory for save area\n"); -		copy_oldmem_page(1, (void *) save_area, sizeof(*save_area), -				 0x200, 0); -		zfcpdump_save_areas[0] = save_area; -	} -#endif -	/* Use sigp detection algorithm if sclp doesn't work. */ -	if (sclp_get_cpu_info(info)) { -		smp_use_sigp_detection = 1; -		for (cpu = 0; cpu <= MAX_CPU_ADDRESS; cpu++) { -			if (cpu == boot_cpu_addr) -				continue; -			if (!raw_cpu_stopped(cpu)) -				continue; -			smp_get_save_area(c_cpus, cpu); -			c_cpus++; -		} -		goto out; -	} -  	if (info->has_cpu_type) {  		for (cpu = 0; cpu < info->combined; cpu++) { -			if (info->cpu[cpu].address == boot_cpu_addr) { -				smp_cpu_type = info->cpu[cpu].type; -				break; -			} +			if (info->cpu[cpu].address != boot_cpu_address) +				continue; +			/* The boot cpu dictates the cpu type. */ +			boot_cpu_type = info->cpu[cpu].type; +			break;  		}  	} - +	c_cpus = s_cpus = 0;  	for (cpu = 0; cpu < info->combined; cpu++) { -		if (info->has_cpu_type && info->cpu[cpu].type != smp_cpu_type) -			continue; -		cpu_addr = info->cpu[cpu].address; -		if (cpu_addr == boot_cpu_addr) +		if (info->has_cpu_type && info->cpu[cpu].type != boot_cpu_type)  			continue; -		if (!raw_cpu_stopped(cpu_addr)) { +		if (cpu < info->configured) { +			smp_get_save_area(c_cpus, info->cpu[cpu].address); +			c_cpus++; +		} else  			s_cpus++; -			continue; -		} -		smp_get_save_area(c_cpus, cpu_addr); -		c_cpus++;  	} -out: -	kfree(info);  	pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus);  	get_online_cpus(); -	__smp_rescan_cpus(); +	__smp_rescan_cpus(info, 0);  	put_online_cpus(); +	kfree(info);  }  /*   *	Activate a secondary processor.   */ -int __cpuinit start_secondary(void *cpuvoid) +static void __cpuinit smp_start_secondary(void *cpuvoid)  { +	S390_lowcore.last_update_clock = get_clock(); +	S390_lowcore.restart_stack = (unsigned long) restart_stack; +	S390_lowcore.restart_fn = (unsigned long) do_restart; +	S390_lowcore.restart_data = 0; +	S390_lowcore.restart_source = -1UL; +	restore_access_regs(S390_lowcore.access_regs_save_area); +	__ctl_load(S390_lowcore.cregs_save_area, 0, 15); +	__load_psw_mask(psw_kernel_bits | PSW_MASK_DAT);  	cpu_init();  	preempt_disable();  	init_cpu_timer();  	init_cpu_vtimer();  	pfault_init(); -  	notify_cpu_starting(smp_processor_id());  	ipi_call_lock();  	set_cpu_online(smp_processor_id(), true);  	ipi_call_unlock(); -	__ctl_clear_bit(0, 28); /* Disable lowcore protection */ -	S390_lowcore.restart_psw.mask = -		PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; -	S390_lowcore.restart_psw.addr = -		PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; -	__ctl_set_bit(0, 28); /* Enable lowcore protection */ -	/* -	 * Wait until the cpu which brought this one up marked it -	 * active before enabling interrupts. -	 */ -	while (!cpumask_test_cpu(smp_processor_id(), cpu_active_mask)) -		cpu_relax();  	local_irq_enable();  	/* cpu_idle will call schedule for us */  	cpu_idle(); -	return 0;  }  struct create_idle { @@ -578,82 +735,20 @@ static void __cpuinit smp_fork_idle(struct work_struct *work)  	complete(&c_idle->done);  } -static int __cpuinit smp_alloc_lowcore(int cpu) -{ -	unsigned long async_stack, panic_stack; -	struct _lowcore *lowcore; - -	lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); -	if (!lowcore) -		return -ENOMEM; -	async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); -	panic_stack = __get_free_page(GFP_KERNEL); -	if (!panic_stack || !async_stack) -		goto out; -	memcpy(lowcore, &S390_lowcore, 512); -	memset((char *)lowcore + 512, 0, sizeof(*lowcore) - 512); -	lowcore->async_stack = async_stack + ASYNC_SIZE; -	lowcore->panic_stack = panic_stack + PAGE_SIZE; -	lowcore->restart_psw.mask = -		PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; -	lowcore->restart_psw.addr = -		PSW_ADDR_AMODE | (unsigned long) restart_int_handler; -	if (user_mode != HOME_SPACE_MODE) -		lowcore->restart_psw.mask |= PSW_ASC_HOME; -#ifndef CONFIG_64BIT -	if (MACHINE_HAS_IEEE) { -		unsigned long save_area; - -		save_area = get_zeroed_page(GFP_KERNEL); -		if (!save_area) -			goto out; -		lowcore->extended_save_area_addr = (u32) save_area; -	} -#else -	if (vdso_alloc_per_cpu(cpu, lowcore)) -		goto out; -#endif -	lowcore_ptr[cpu] = lowcore; -	return 0; - -out: -	free_page(panic_stack); -	free_pages(async_stack, ASYNC_ORDER); -	free_pages((unsigned long) lowcore, LC_ORDER); -	return -ENOMEM; -} - -static void smp_free_lowcore(int cpu) -{ -	struct _lowcore *lowcore; - -	lowcore = lowcore_ptr[cpu]; -#ifndef CONFIG_64BIT -	if (MACHINE_HAS_IEEE) -		free_page((unsigned long) lowcore->extended_save_area_addr); -#else -	vdso_free_per_cpu(cpu, lowcore); -#endif -	free_page(lowcore->panic_stack - PAGE_SIZE); -	free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER); -	free_pages((unsigned long) lowcore, LC_ORDER); -	lowcore_ptr[cpu] = NULL; -} -  /* Upping and downing of CPUs */  int __cpuinit __cpu_up(unsigned int cpu)  { -	struct _lowcore *cpu_lowcore;  	struct create_idle c_idle; -	struct task_struct *idle; -	struct stack_frame *sf; -	u32 lowcore; -	int ccode; +	struct pcpu *pcpu; +	int rc; -	if (smp_cpu_state[cpu] != CPU_STATE_CONFIGURED) +	pcpu = pcpu_devices + cpu; +	if (pcpu->state != CPU_STATE_CONFIGURED) +		return -EIO; +	if (pcpu_sigp_retry(pcpu, sigp_initial_cpu_reset, 0) != +	    sigp_order_code_accepted)  		return -EIO; -	idle = current_set[cpu]; -	if (!idle) { +	if (!pcpu->idle) {  		c_idle.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done);  		INIT_WORK_ONSTACK(&c_idle.work, smp_fork_idle);  		c_idle.cpu = cpu; @@ -661,68 +756,28 @@ int __cpuinit __cpu_up(unsigned int cpu)  		wait_for_completion(&c_idle.done);  		if (IS_ERR(c_idle.idle))  			return PTR_ERR(c_idle.idle); -		idle = c_idle.idle; -		current_set[cpu] = c_idle.idle; +		pcpu->idle = c_idle.idle;  	} -	init_idle(idle, cpu); -	if (smp_alloc_lowcore(cpu)) -		return -ENOMEM; -	do { -		ccode = sigp(cpu, sigp_initial_cpu_reset); -		if (ccode == sigp_busy) -			udelay(10); -		if (ccode == sigp_not_operational) -			goto err_out; -	} while (ccode == sigp_busy); - -	lowcore = (u32)(unsigned long)lowcore_ptr[cpu]; -	while (sigp_p(lowcore, cpu, sigp_set_prefix) == sigp_busy) -		udelay(10); - -	cpu_lowcore = lowcore_ptr[cpu]; -	cpu_lowcore->kernel_stack = (unsigned long) -		task_stack_page(idle) + THREAD_SIZE; -	cpu_lowcore->thread_info = (unsigned long) task_thread_info(idle); -	sf = (struct stack_frame *) (cpu_lowcore->kernel_stack -				     - sizeof(struct pt_regs) -				     - sizeof(struct stack_frame)); -	memset(sf, 0, sizeof(struct stack_frame)); -	sf->gprs[9] = (unsigned long) sf; -	cpu_lowcore->gpregs_save_area[15] = (unsigned long) sf; -	__ctl_store(cpu_lowcore->cregs_save_area, 0, 15); -	atomic_inc(&init_mm.context.attach_count); -	asm volatile( -		"	stam	0,15,0(%0)" -		: : "a" (&cpu_lowcore->access_regs_save_area) : "memory"); -	cpu_lowcore->percpu_offset = __per_cpu_offset[cpu]; -	cpu_lowcore->current_task = (unsigned long) idle; -	cpu_lowcore->cpu_nr = cpu; -	cpu_lowcore->kernel_asce = S390_lowcore.kernel_asce; -	cpu_lowcore->machine_flags = S390_lowcore.machine_flags; -	cpu_lowcore->ftrace_func = S390_lowcore.ftrace_func; -	memcpy(cpu_lowcore->stfle_fac_list, S390_lowcore.stfle_fac_list, -	       MAX_FACILITY_BIT/8); -	eieio(); - -	while (sigp(cpu, sigp_restart) == sigp_busy) -		udelay(10); - +	init_idle(pcpu->idle, cpu); +	rc = pcpu_alloc_lowcore(pcpu, cpu); +	if (rc) +		return rc; +	pcpu_prepare_secondary(pcpu, cpu); +	pcpu_attach_task(pcpu, pcpu->idle); +	pcpu_start_fn(pcpu, smp_start_secondary, NULL);  	while (!cpu_online(cpu))  		cpu_relax();  	return 0; - -err_out: -	smp_free_lowcore(cpu); -	return -EIO;  }  static int __init setup_possible_cpus(char *s)  { -	int pcpus, cpu; +	int max, cpu; -	pcpus = simple_strtoul(s, NULL, 0); +	if (kstrtoint(s, 0, &max) < 0) +		return 0;  	init_cpu_possible(cpumask_of(0)); -	for (cpu = 1; cpu < pcpus && cpu < nr_cpu_ids; cpu++) +	for (cpu = 1; cpu < max && cpu < nr_cpu_ids; cpu++)  		set_cpu_possible(cpu, true);  	return 0;  } @@ -732,113 +787,79 @@ early_param("possible_cpus", setup_possible_cpus);  int __cpu_disable(void)  { -	struct ec_creg_mask_parms cr_parms; -	int cpu = smp_processor_id(); - -	set_cpu_online(cpu, false); +	unsigned long cregs[16]; -	/* Disable pfault pseudo page faults on this cpu. */ +	set_cpu_online(smp_processor_id(), false); +	/* Disable pseudo page faults on this cpu. */  	pfault_fini(); - -	memset(&cr_parms.orvals, 0, sizeof(cr_parms.orvals)); -	memset(&cr_parms.andvals, 0xff, sizeof(cr_parms.andvals)); - -	/* disable all external interrupts */ -	cr_parms.orvals[0] = 0; -	cr_parms.andvals[0] = ~(1 << 15 | 1 << 14 | 1 << 13 | 1 << 11 | -				1 << 10 | 1 <<	9 | 1 <<  6 | 1 <<  5 | -				1 <<  4); -	/* disable all I/O interrupts */ -	cr_parms.orvals[6] = 0; -	cr_parms.andvals[6] = ~(1 << 31 | 1 << 30 | 1 << 29 | 1 << 28 | -				1 << 27 | 1 << 26 | 1 << 25 | 1 << 24); -	/* disable most machine checks */ -	cr_parms.orvals[14] = 0; -	cr_parms.andvals[14] = ~(1 << 28 | 1 << 27 | 1 << 26 | -				 1 << 25 | 1 << 24); - -	smp_ctl_bit_callback(&cr_parms); - +	/* Disable interrupt sources via control register. */ +	__ctl_store(cregs, 0, 15); +	cregs[0]  &= ~0x0000ee70UL;	/* disable all external interrupts */ +	cregs[6]  &= ~0xff000000UL;	/* disable all I/O interrupts */ +	cregs[14] &= ~0x1f000000UL;	/* disable most machine checks */ +	__ctl_load(cregs, 0, 15);  	return 0;  }  void __cpu_die(unsigned int cpu)  { +	struct pcpu *pcpu; +  	/* Wait until target cpu is down */ -	while (!cpu_stopped(cpu)) +	pcpu = pcpu_devices + cpu; +	while (!pcpu_stopped(pcpu))  		cpu_relax(); -	while (sigp_p(0, cpu, sigp_set_prefix) == sigp_busy) -		udelay(10); -	smp_free_lowcore(cpu); +	pcpu_free_lowcore(pcpu);  	atomic_dec(&init_mm.context.attach_count);  }  void __noreturn cpu_die(void)  {  	idle_task_exit(); -	while (sigp(smp_processor_id(), sigp_stop) == sigp_busy) -		cpu_relax(); -	for (;;); +	pcpu_sigp_retry(pcpu_devices + smp_processor_id(), sigp_stop, 0); +	for (;;) ;  }  #endif /* CONFIG_HOTPLUG_CPU */ -void __init smp_prepare_cpus(unsigned int max_cpus) +static void smp_call_os_info_init_fn(void)  { -#ifndef CONFIG_64BIT -	unsigned long save_area = 0; -#endif -	unsigned long async_stack, panic_stack; -	struct _lowcore *lowcore; +	int (*init_fn)(void); +	unsigned long size; -	smp_detect_cpus(); +	init_fn = os_info_old_entry(OS_INFO_INIT_FN, &size); +	if (!init_fn) +		return; +	init_fn(); +} +void __init smp_prepare_cpus(unsigned int max_cpus) +{  	/* request the 0x1201 emergency signal external interrupt */  	if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0)  		panic("Couldn't request external interrupt 0x1201");  	/* request the 0x1202 external call external interrupt */  	if (register_external_interrupt(0x1202, do_ext_call_interrupt) != 0)  		panic("Couldn't request external interrupt 0x1202"); - -	/* Reallocate current lowcore, but keep its contents. */ -	lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); -	panic_stack = __get_free_page(GFP_KERNEL); -	async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); -	BUG_ON(!lowcore || !panic_stack || !async_stack); -#ifndef CONFIG_64BIT -	if (MACHINE_HAS_IEEE) -		save_area = get_zeroed_page(GFP_KERNEL); -#endif -	local_irq_disable(); -	local_mcck_disable(); -	lowcore_ptr[smp_processor_id()] = lowcore; -	*lowcore = S390_lowcore; -	lowcore->panic_stack = panic_stack + PAGE_SIZE; -	lowcore->async_stack = async_stack + ASYNC_SIZE; -#ifndef CONFIG_64BIT -	if (MACHINE_HAS_IEEE) -		lowcore->extended_save_area_addr = (u32) save_area; -#endif -	set_prefix((u32)(unsigned long) lowcore); -	local_mcck_enable(); -	local_irq_enable(); -#ifdef CONFIG_64BIT -	if (vdso_alloc_per_cpu(smp_processor_id(), &S390_lowcore)) -		BUG(); -#endif +	smp_call_os_info_init_fn(); +	smp_detect_cpus();  }  void __init smp_prepare_boot_cpu(void)  { -	BUG_ON(smp_processor_id() != 0); - -	current_thread_info()->cpu = 0; -	set_cpu_present(0, true); -	set_cpu_online(0, true); +	struct pcpu *pcpu = pcpu_devices; + +	boot_cpu_address = stap(); +	pcpu->idle = current; +	pcpu->state = CPU_STATE_CONFIGURED; +	pcpu->address = boot_cpu_address; +	pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix(); +	pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE; +	pcpu->panic_stack = S390_lowcore.panic_stack - PAGE_SIZE;  	S390_lowcore.percpu_offset = __per_cpu_offset[0]; -	current_set[0] = current; -	smp_cpu_state[0] = CPU_STATE_CONFIGURED;  	cpu_set_polarization(0, POLARIZATION_UNKNOWN); +	set_cpu_present(0, true); +	set_cpu_online(0, true);  }  void __init smp_cpus_done(unsigned int max_cpus) @@ -848,7 +869,6 @@ void __init smp_cpus_done(unsigned int max_cpus)  void __init smp_setup_processor_id(void)  {  	S390_lowcore.cpu_nr = 0; -	__cpu_logical_map[0] = stap();  }  /* @@ -864,56 +884,57 @@ int setup_profiling_timer(unsigned int multiplier)  #ifdef CONFIG_HOTPLUG_CPU  static ssize_t cpu_configure_show(struct device *dev, -				struct device_attribute *attr, char *buf) +				  struct device_attribute *attr, char *buf)  {  	ssize_t count;  	mutex_lock(&smp_cpu_state_mutex); -	count = sprintf(buf, "%d\n", smp_cpu_state[dev->id]); +	count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state);  	mutex_unlock(&smp_cpu_state_mutex);  	return count;  }  static ssize_t cpu_configure_store(struct device *dev, -				  struct device_attribute *attr, -				  const char *buf, size_t count) +				   struct device_attribute *attr, +				   const char *buf, size_t count)  { -	int cpu = dev->id; -	int val, rc; +	struct pcpu *pcpu; +	int cpu, val, rc;  	char delim;  	if (sscanf(buf, "%d %c", &val, &delim) != 1)  		return -EINVAL;  	if (val != 0 && val != 1)  		return -EINVAL; -  	get_online_cpus();  	mutex_lock(&smp_cpu_state_mutex);  	rc = -EBUSY;  	/* disallow configuration changes of online cpus and cpu 0 */ +	cpu = dev->id;  	if (cpu_online(cpu) || cpu == 0)  		goto out; +	pcpu = pcpu_devices + cpu;  	rc = 0;  	switch (val) {  	case 0: -		if (smp_cpu_state[cpu] == CPU_STATE_CONFIGURED) { -			rc = sclp_cpu_deconfigure(__cpu_logical_map[cpu]); -			if (!rc) { -				smp_cpu_state[cpu] = CPU_STATE_STANDBY; -				cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); -				topology_expect_change(); -			} -		} +		if (pcpu->state != CPU_STATE_CONFIGURED) +			break; +		rc = sclp_cpu_deconfigure(pcpu->address); +		if (rc) +			break; +		pcpu->state = CPU_STATE_STANDBY; +		cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); +		topology_expect_change();  		break;  	case 1: -		if (smp_cpu_state[cpu] == CPU_STATE_STANDBY) { -			rc = sclp_cpu_configure(__cpu_logical_map[cpu]); -			if (!rc) { -				smp_cpu_state[cpu] = CPU_STATE_CONFIGURED; -				cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); -				topology_expect_change(); -			} -		} +		if (pcpu->state != CPU_STATE_STANDBY) +			break; +		rc = sclp_cpu_configure(pcpu->address); +		if (rc) +			break; +		pcpu->state = CPU_STATE_CONFIGURED; +		cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); +		topology_expect_change();  		break;  	default:  		break; @@ -929,7 +950,7 @@ static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);  static ssize_t show_cpu_address(struct device *dev,  				struct device_attribute *attr, char *buf)  { -	return sprintf(buf, "%d\n", __cpu_logical_map[dev->id]); +	return sprintf(buf, "%d\n", pcpu_devices[dev->id].address);  }  static DEVICE_ATTR(address, 0444, show_cpu_address, NULL); @@ -961,22 +982,16 @@ static DEVICE_ATTR(capability, 0444, show_capability, NULL);  static ssize_t show_idle_count(struct device *dev,  				struct device_attribute *attr, char *buf)  { -	struct s390_idle_data *idle; +	struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);  	unsigned long long idle_count;  	unsigned int sequence; -	idle = &per_cpu(s390_idle, dev->id); -repeat: -	sequence = idle->sequence; -	smp_rmb(); -	if (sequence & 1) -		goto repeat; -	idle_count = idle->idle_count; -	if (idle->idle_enter) -		idle_count++; -	smp_rmb(); -	if (idle->sequence != sequence) -		goto repeat; +	do { +		sequence = ACCESS_ONCE(idle->sequence); +		idle_count = ACCESS_ONCE(idle->idle_count); +		if (ACCESS_ONCE(idle->idle_enter)) +			idle_count++; +	} while ((sequence & 1) || (idle->sequence != sequence));  	return sprintf(buf, "%llu\n", idle_count);  }  static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); @@ -984,24 +999,18 @@ static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);  static ssize_t show_idle_time(struct device *dev,  				struct device_attribute *attr, char *buf)  { -	struct s390_idle_data *idle; -	unsigned long long now, idle_time, idle_enter; +	struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); +	unsigned long long now, idle_time, idle_enter, idle_exit;  	unsigned int sequence; -	idle = &per_cpu(s390_idle, dev->id); -	now = get_clock(); -repeat: -	sequence = idle->sequence; -	smp_rmb(); -	if (sequence & 1) -		goto repeat; -	idle_time = idle->idle_time; -	idle_enter = idle->idle_enter; -	if (idle_enter != 0ULL && idle_enter < now) -		idle_time += now - idle_enter; -	smp_rmb(); -	if (idle->sequence != sequence) -		goto repeat; +	do { +		now = get_clock(); +		sequence = ACCESS_ONCE(idle->sequence); +		idle_time = ACCESS_ONCE(idle->idle_time); +		idle_enter = ACCESS_ONCE(idle->idle_enter); +		idle_exit = ACCESS_ONCE(idle->idle_exit); +	} while ((sequence & 1) || (idle->sequence != sequence)); +	idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0;  	return sprintf(buf, "%llu\n", idle_time >> 12);  }  static DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); @@ -1021,7 +1030,7 @@ static int __cpuinit smp_cpu_notify(struct notifier_block *self,  				    unsigned long action, void *hcpu)  {  	unsigned int cpu = (unsigned int)(long)hcpu; -	struct cpu *c = &per_cpu(cpu_devices, cpu); +	struct cpu *c = &pcpu_devices[cpu].cpu;  	struct device *s = &c->dev;  	struct s390_idle_data *idle;  	int err = 0; @@ -1047,7 +1056,7 @@ static struct notifier_block __cpuinitdata smp_cpu_nb = {  static int __devinit smp_add_present_cpu(int cpu)  { -	struct cpu *c = &per_cpu(cpu_devices, cpu); +	struct cpu *c = &pcpu_devices[cpu].cpu;  	struct device *s = &c->dev;  	int rc; @@ -1085,29 +1094,21 @@ out:  int __ref smp_rescan_cpus(void)  { -	cpumask_t newcpus; -	int cpu; -	int rc; +	struct sclp_cpu_info *info; +	int nr; +	info = smp_get_cpu_info(); +	if (!info) +		return -ENOMEM;  	get_online_cpus();  	mutex_lock(&smp_cpu_state_mutex); -	cpumask_copy(&newcpus, cpu_present_mask); -	rc = __smp_rescan_cpus(); -	if (rc) -		goto out; -	cpumask_andnot(&newcpus, cpu_present_mask, &newcpus); -	for_each_cpu(cpu, &newcpus) { -		rc = smp_add_present_cpu(cpu); -		if (rc) -			set_cpu_present(cpu, false); -	} -	rc = 0; -out: +	nr = __smp_rescan_cpus(info, 1);  	mutex_unlock(&smp_cpu_state_mutex);  	put_online_cpus(); -	if (!cpumask_empty(&newcpus)) +	kfree(info); +	if (nr)  		topology_schedule_update(); -	return rc; +	return 0;  }  static ssize_t __ref rescan_store(struct device *dev, diff --git a/arch/s390/kernel/switch_cpu.S b/arch/s390/kernel/switch_cpu.S deleted file mode 100644 index bfe070bc7659..000000000000 --- a/arch/s390/kernel/switch_cpu.S +++ /dev/null @@ -1,58 +0,0 @@ -/* - * 31-bit switch cpu code - * - * Copyright IBM Corp. 2009 - * - */ - -#include <linux/linkage.h> -#include <asm/asm-offsets.h> -#include <asm/ptrace.h> - -# smp_switch_to_cpu switches to destination cpu and executes the passed function -# Parameter: %r2 - function to call -#	     %r3 - function parameter -#	     %r4 - stack poiner -#	     %r5 - current cpu -#	     %r6 - destination cpu - -	.section .text -ENTRY(smp_switch_to_cpu) -	stm	%r6,%r15,__SF_GPRS(%r15) -	lr	%r1,%r15 -	ahi	%r15,-STACK_FRAME_OVERHEAD -	st	%r1,__SF_BACKCHAIN(%r15) -	basr	%r13,0 -0:	la	%r1,.gprregs_addr-0b(%r13) -	l	%r1,0(%r1) -	stm	%r0,%r15,0(%r1) -1:	sigp	%r0,%r6,__SIGP_RESTART	/* start destination CPU */ -	brc	2,1b			/* busy, try again */ -2:	sigp	%r0,%r5,__SIGP_STOP	/* stop current CPU */ -	brc	2,2b			/* busy, try again */ -3:	j	3b - -ENTRY(smp_restart_cpu) -	basr	%r13,0 -0:	la	%r1,.gprregs_addr-0b(%r13) -	l	%r1,0(%r1) -	lm	%r0,%r15,0(%r1) -1:	sigp	%r0,%r5,__SIGP_SENSE	/* Wait for calling CPU */ -	brc	10,1b			/* busy, accepted (status 0), running */ -	tmll	%r0,0x40		/* Test if calling CPU is stopped */ -	jz	1b -	ltr	%r4,%r4			/* New stack ? */ -	jz	1f -	lr	%r15,%r4 -1:	lr	%r14,%r2		/* r14: Function to call */ -	lr	%r2,%r3			/* r2 : Parameter for function*/ -	basr	%r14,%r14		/* Call function */ - -.gprregs_addr: -	.long	.gprregs - -	.section .data,"aw",@progbits -.gprregs: -	.rept	16 -	.long	0 -	.endr diff --git a/arch/s390/kernel/switch_cpu64.S b/arch/s390/kernel/switch_cpu64.S deleted file mode 100644 index fcc42d799e41..000000000000 --- a/arch/s390/kernel/switch_cpu64.S +++ /dev/null @@ -1,51 +0,0 @@ -/* - * 64-bit switch cpu code - * - * Copyright IBM Corp. 2009 - * - */ - -#include <linux/linkage.h> -#include <asm/asm-offsets.h> -#include <asm/ptrace.h> - -# smp_switch_to_cpu switches to destination cpu and executes the passed function -# Parameter: %r2 - function to call -#	     %r3 - function parameter -#	     %r4 - stack poiner -#	     %r5 - current cpu -#	     %r6 - destination cpu - -	.section .text -ENTRY(smp_switch_to_cpu) -	stmg	%r6,%r15,__SF_GPRS(%r15) -	lgr	%r1,%r15 -	aghi	%r15,-STACK_FRAME_OVERHEAD -	stg	%r1,__SF_BACKCHAIN(%r15) -	larl	%r1,.gprregs -	stmg	%r0,%r15,0(%r1) -1:	sigp	%r0,%r6,__SIGP_RESTART	/* start destination CPU */ -	brc	2,1b			/* busy, try again */ -2:	sigp	%r0,%r5,__SIGP_STOP	/* stop current CPU */ -	brc	2,2b			/* busy, try again */ -3:	j	3b - -ENTRY(smp_restart_cpu) -	larl	%r1,.gprregs -	lmg	%r0,%r15,0(%r1) -1:	sigp	%r0,%r5,__SIGP_SENSE	/* Wait for calling CPU */ -	brc	10,1b			/* busy, accepted (status 0), running */ -	tmll	%r0,0x40		/* Test if calling CPU is stopped */ -	jz	1b -	ltgr	%r4,%r4			/* New stack ? */ -	jz	1f -	lgr	%r15,%r4 -1:	lgr	%r14,%r2		/* r14: Function to call */ -	lgr	%r2,%r3			/* r2 : Parameter for function*/ -	basr	%r14,%r14		/* Call function */ - -	.section .data,"aw",@progbits -.gprregs: -	.rept	16 -	.quad	0 -	.endr diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S index acb78cdee896..dd70ef046058 100644 --- a/arch/s390/kernel/swsusp_asm64.S +++ b/arch/s390/kernel/swsusp_asm64.S @@ -42,7 +42,7 @@ ENTRY(swsusp_arch_suspend)  	lghi	%r1,0x1000  	/* Save CPU address */ -	stap	__LC_CPU_ADDRESS(%r0) +	stap	__LC_EXT_CPU_ADDR(%r0)  	/* Store registers */  	mvc	0x318(4,%r1),__SF_EMPTY(%r15)	/* move prefix to lowcore */ @@ -173,15 +173,15 @@ pgm_check_entry:  	larl	%r1,.Lresume_cpu		/* Resume CPU address: r2 */  	stap	0(%r1)  	llgh	%r2,0(%r1) -	llgh	%r1,__LC_CPU_ADDRESS(%r0)	/* Suspend CPU address: r1 */ +	llgh	%r1,__LC_EXT_CPU_ADDR(%r0)	/* Suspend CPU address: r1 */  	cgr	%r1,%r2  	je	restore_registers		/* r1 = r2 -> nothing to do */  	larl	%r4,.Lrestart_suspend_psw	/* Set new restart PSW */  	mvc	__LC_RST_NEW_PSW(16,%r0),0(%r4)  3: -	sigp	%r9,%r1,__SIGP_INITIAL_CPU_RESET -	brc	8,4f	/* accepted */ -	brc	2,3b	/* busy, try again */ +	sigp	%r9,%r1,11			/* sigp initial cpu reset */ +	brc	8,4f				/* accepted */ +	brc	2,3b				/* busy, try again */  	/* Suspend CPU not available -> panic */  	larl	%r15,init_thread_union @@ -196,10 +196,10 @@ pgm_check_entry:  	lpsw	0(%r3)  4:  	/* Switch to suspend CPU */ -	sigp	%r9,%r1,__SIGP_RESTART	/* start suspend CPU */ +	sigp	%r9,%r1,6		/* sigp restart to suspend CPU */  	brc	2,4b			/* busy, try again */  5: -	sigp	%r9,%r2,__SIGP_STOP	/* stop resume (current) CPU */ +	sigp	%r9,%r2,5		/* sigp stop to current resume CPU */  	brc	2,5b			/* busy, try again */  6:	j	6b @@ -207,7 +207,7 @@ restart_suspend:  	larl	%r1,.Lresume_cpu  	llgh	%r2,0(%r1)  7: -	sigp	%r9,%r2,__SIGP_SENSE	/* Wait for resume CPU */ +	sigp	%r9,%r2,1		/* sigp sense, wait for resume CPU */  	brc	8,7b			/* accepted, status 0, still running */  	brc	2,7b			/* busy, try again */  	tmll	%r9,0x40		/* Test if resume CPU is stopped */ @@ -257,6 +257,9 @@ restore_registers:  	lghi	%r2,0  	brasl	%r14,arch_set_page_states +	/* Log potential guest relocation */ +	brasl	%r14,lgr_info_log +  	/* Reinitialize the channel subsystem */  	brasl	%r14,channel_subsystem_reinit diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 14da278febbf..d4e1cb1dbcd1 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -165,7 +165,7 @@ void init_cpu_timer(void)  	__ctl_set_bit(0, 4);  } -static void clock_comparator_interrupt(unsigned int ext_int_code, +static void clock_comparator_interrupt(struct ext_code ext_code,  				       unsigned int param32,  				       unsigned long param64)  { @@ -177,7 +177,7 @@ static void clock_comparator_interrupt(unsigned int ext_int_code,  static void etr_timing_alert(struct etr_irq_parm *);  static void stp_timing_alert(struct stp_irq_parm *); -static void timing_alert_interrupt(unsigned int ext_int_code, +static void timing_alert_interrupt(struct ext_code ext_code,  				   unsigned int param32, unsigned long param64)  {  	kstat_cpu(smp_processor_id()).irqs[EXTINT_TLA]++; diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 7370a41948ca..4f8dc942257c 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -79,12 +79,12 @@ static struct mask_info *add_cpus_to_mask(struct topology_cpu *tl_cpu,  	     cpu < TOPOLOGY_CPU_BITS;  	     cpu = find_next_bit(&tl_cpu->mask[0], TOPOLOGY_CPU_BITS, cpu + 1))  	{ -		unsigned int rcpu, lcpu; +		unsigned int rcpu; +		int lcpu;  		rcpu = TOPOLOGY_CPU_BITS - 1 - cpu + tl_cpu->origin; -		for_each_present_cpu(lcpu) { -			if (cpu_logical_map(lcpu) != rcpu) -				continue; +		lcpu = smp_find_processor_id(rcpu); +		if (lcpu >= 0) {  			cpumask_set_cpu(lcpu, &book->mask);  			cpu_book_id[lcpu] = book->id;  			cpumask_set_cpu(lcpu, &core->mask); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 5ce3750b181f..cd6ebe12c481 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -41,6 +41,7 @@  #include <asm/cpcmd.h>  #include <asm/lowcore.h>  #include <asm/debug.h> +#include <asm/ipl.h>  #include "entry.h"  void (*pgm_check_table[128])(struct pt_regs *regs); @@ -144,8 +145,8 @@ void show_stack(struct task_struct *task, unsigned long *sp)  	for (i = 0; i < kstack_depth_to_print; i++) {  		if (((addr_t) stack & (THREAD_SIZE-1)) == 0)  			break; -		if (i && ((i * sizeof (long) % 32) == 0)) -			printk("\n       "); +		if ((i * sizeof(long) % 32) == 0) +			printk("%s       ", i == 0 ? "" : "\n");  		printk(LONG, *stack++);  	}  	printk("\n"); @@ -239,6 +240,7 @@ void die(struct pt_regs *regs, const char *str)  	static int die_counter;  	oops_enter(); +	lgr_info_log();  	debug_stop_all();  	console_verbose();  	spin_lock_irq(&die_lock); diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index d73630b4fe1d..9c80138206b0 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -89,18 +89,11 @@ static void vdso_init_data(struct vdso_data *vd)  #ifdef CONFIG_64BIT  /* - * Setup per cpu vdso data page. - */ -static void vdso_init_per_cpu_data(int cpu, struct vdso_per_cpu_data *vpcd) -{ -} - -/*   * Allocate/free per cpu vdso data.   */  #define SEGMENT_ORDER	2 -int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore) +int vdso_alloc_per_cpu(struct _lowcore *lowcore)  {  	unsigned long segment_table, page_table, page_frame;  	u32 *psal, *aste; @@ -139,7 +132,6 @@ int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore)  	aste[4] = (u32)(addr_t) psal;  	lowcore->vdso_per_cpu_data = page_frame; -	vdso_init_per_cpu_data(cpu, (struct vdso_per_cpu_data *) page_frame);  	return 0;  out: @@ -149,7 +141,7 @@ out:  	return -ENOMEM;  } -void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore) +void vdso_free_per_cpu(struct _lowcore *lowcore)  {  	unsigned long segment_table, page_table, page_frame;  	u32 *psal, *aste; @@ -168,19 +160,15 @@ void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore)  	free_pages(segment_table, SEGMENT_ORDER);  } -static void __vdso_init_cr5(void *dummy) +static void vdso_init_cr5(void)  {  	unsigned long cr5; +	if (user_mode == HOME_SPACE_MODE || !vdso_enabled) +		return;  	cr5 = offsetof(struct _lowcore, paste);  	__ctl_load(cr5, 5, 5);  } - -static void vdso_init_cr5(void) -{ -	if (user_mode != HOME_SPACE_MODE && vdso_enabled) -		on_each_cpu(__vdso_init_cr5, NULL, 1); -}  #endif /* CONFIG_64BIT */  /* @@ -253,17 +241,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)  	 * on the "data" page of the vDSO or you'll stop getting kernel  	 * updates and your nice userland gettimeofday will be totally dead.  	 * It's fine to use that for setting breakpoints in the vDSO code -	 * pages though -	 * -	 * Make sure the vDSO gets into every core dump. -	 * Dumping its contents makes post-mortem fully interpretable later -	 * without matching up the same kernel and hardware config to see -	 * what PC values meant. +	 * pages though.  	 */  	rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,  				     VM_READ|VM_EXEC| -				     VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| -				     VM_ALWAYSDUMP, +				     VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,  				     vdso_pagelist);  	if (rc)  		current->mm->context.vdso_base = 0; @@ -322,10 +304,8 @@ static int __init vdso_init(void)  	}  	vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data);  	vdso64_pagelist[vdso64_pages] = NULL; -#ifndef CONFIG_SMP -	if (vdso_alloc_per_cpu(0, &S390_lowcore)) +	if (vdso_alloc_per_cpu(&S390_lowcore))  		BUG(); -#endif  	vdso_init_cr5();  #endif /* CONFIG_64BIT */ @@ -335,7 +315,7 @@ static int __init vdso_init(void)  	return 0;  } -arch_initcall(vdso_init); +early_initcall(vdso_init);  int in_gate_area_no_mm(unsigned long addr)  { diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index bb48977f5469..39ebff506946 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -26,6 +26,7 @@  #include <asm/irq_regs.h>  #include <asm/cputime.h>  #include <asm/irq.h> +#include "entry.h"  static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer); @@ -123,153 +124,53 @@ void account_system_vtime(struct task_struct *tsk)  }  EXPORT_SYMBOL_GPL(account_system_vtime); -void __kprobes vtime_start_cpu(__u64 int_clock, __u64 enter_timer) +void __kprobes vtime_stop_cpu(void)  {  	struct s390_idle_data *idle = &__get_cpu_var(s390_idle);  	struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer); -	__u64 idle_time, expires; +	unsigned long long idle_time; +	unsigned long psw_mask; -	if (idle->idle_enter == 0ULL) -		return; +	trace_hardirqs_on(); +	/* Don't trace preempt off for idle. */ +	stop_critical_timings(); -	/* Account time spent with enabled wait psw loaded as idle time. */ -	idle_time = int_clock - idle->idle_enter; -	account_idle_time(idle_time); -	S390_lowcore.steal_timer += -		idle->idle_enter - S390_lowcore.last_update_clock; -	S390_lowcore.last_update_clock = int_clock; - -	/* Account system time spent going idle. */ -	S390_lowcore.system_timer += S390_lowcore.last_update_timer - vq->idle; -	S390_lowcore.last_update_timer = enter_timer; - -	/* Restart vtime CPU timer */ -	if (vq->do_spt) { -		/* Program old expire value but first save progress. */ -		expires = vq->idle - enter_timer; -		expires += get_vtimer(); -		set_vtimer(expires); -	} else { -		/* Don't account the CPU timer delta while the cpu was idle. */ -		vq->elapsed -= vq->idle - enter_timer; -	} +	/* Wait for external, I/O or machine check interrupt. */ +	psw_mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_DAT | +		PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; +	idle->nohz_delay = 0; +	/* Call the assembler magic in entry.S */ +	psw_idle(idle, vq, psw_mask, !list_empty(&vq->list)); + +	/* Reenable preemption tracer. */ +	start_critical_timings(); + +	/* Account time spent with enabled wait psw loaded as idle time. */  	idle->sequence++;  	smp_wmb(); +	idle_time = idle->idle_exit - idle->idle_enter;  	idle->idle_time += idle_time; -	idle->idle_enter = 0ULL; +	idle->idle_enter = idle->idle_exit = 0ULL;  	idle->idle_count++; +	account_idle_time(idle_time);  	smp_wmb();  	idle->sequence++;  } -void __kprobes vtime_stop_cpu(void) -{ -	struct s390_idle_data *idle = &__get_cpu_var(s390_idle); -	struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer); -	psw_t psw; - -	/* Wait for external, I/O or machine check interrupt. */ -	psw.mask = psw_kernel_bits | PSW_MASK_WAIT | -		PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; - -	idle->nohz_delay = 0; - -	/* Check if the CPU timer needs to be reprogrammed. */ -	if (vq->do_spt) { -		__u64 vmax = VTIMER_MAX_SLICE; -		/* -		 * The inline assembly is equivalent to -		 *	vq->idle = get_cpu_timer(); -		 *	set_cpu_timer(VTIMER_MAX_SLICE); -		 *	idle->idle_enter = get_clock(); -		 *	__load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | -		 *			   PSW_MASK_DAT | PSW_MASK_IO | -		 *			   PSW_MASK_EXT | PSW_MASK_MCHECK); -		 * The difference is that the inline assembly makes sure that -		 * the last three instruction are stpt, stck and lpsw in that -		 * order. This is done to increase the precision. -		 */ -		asm volatile( -#ifndef CONFIG_64BIT -			"	basr	1,0\n" -			"0:	ahi	1,1f-0b\n" -			"	st	1,4(%2)\n" -#else /* CONFIG_64BIT */ -			"	larl	1,1f\n" -			"	stg	1,8(%2)\n" -#endif /* CONFIG_64BIT */ -			"	stpt	0(%4)\n" -			"	spt	0(%5)\n" -			"	stck	0(%3)\n" -#ifndef CONFIG_64BIT -			"	lpsw	0(%2)\n" -#else /* CONFIG_64BIT */ -			"	lpswe	0(%2)\n" -#endif /* CONFIG_64BIT */ -			"1:" -			: "=m" (idle->idle_enter), "=m" (vq->idle) -			: "a" (&psw), "a" (&idle->idle_enter), -			  "a" (&vq->idle), "a" (&vmax), "m" (vmax), "m" (psw) -			: "memory", "cc", "1"); -	} else { -		/* -		 * The inline assembly is equivalent to -		 *	vq->idle = get_cpu_timer(); -		 *	idle->idle_enter = get_clock(); -		 *	__load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | -		 *			   PSW_MASK_DAT | PSW_MASK_IO | -		 *			   PSW_MASK_EXT | PSW_MASK_MCHECK); -		 * The difference is that the inline assembly makes sure that -		 * the last three instruction are stpt, stck and lpsw in that -		 * order. This is done to increase the precision. -		 */ -		asm volatile( -#ifndef CONFIG_64BIT -			"	basr	1,0\n" -			"0:	ahi	1,1f-0b\n" -			"	st	1,4(%2)\n" -#else /* CONFIG_64BIT */ -			"	larl	1,1f\n" -			"	stg	1,8(%2)\n" -#endif /* CONFIG_64BIT */ -			"	stpt	0(%4)\n" -			"	stck	0(%3)\n" -#ifndef CONFIG_64BIT -			"	lpsw	0(%2)\n" -#else /* CONFIG_64BIT */ -			"	lpswe	0(%2)\n" -#endif /* CONFIG_64BIT */ -			"1:" -			: "=m" (idle->idle_enter), "=m" (vq->idle) -			: "a" (&psw), "a" (&idle->idle_enter), -			  "a" (&vq->idle), "m" (psw) -			: "memory", "cc", "1"); -	} -} -  cputime64_t s390_get_idle_time(int cpu)  { -	struct s390_idle_data *idle; -	unsigned long long now, idle_time, idle_enter; +	struct s390_idle_data *idle = &per_cpu(s390_idle, cpu); +	unsigned long long now, idle_enter, idle_exit;  	unsigned int sequence; -	idle = &per_cpu(s390_idle, cpu); - -	now = get_clock(); -repeat: -	sequence = idle->sequence; -	smp_rmb(); -	if (sequence & 1) -		goto repeat; -	idle_time = 0; -	idle_enter = idle->idle_enter; -	if (idle_enter != 0ULL && idle_enter < now) -		idle_time = now - idle_enter; -	smp_rmb(); -	if (idle->sequence != sequence) -		goto repeat; -	return idle_time; +	do { +		now = get_clock(); +		sequence = ACCESS_ONCE(idle->sequence); +		idle_enter = ACCESS_ONCE(idle->idle_enter); +		idle_exit = ACCESS_ONCE(idle->idle_exit); +	} while ((sequence & 1) || (idle->sequence != sequence)); +	return idle_enter ? ((idle_exit ? : now) - idle_enter) : 0;  }  /* @@ -319,7 +220,7 @@ static void do_callbacks(struct list_head *cb_list)  /*   * Handler for the virtual CPU timer.   */ -static void do_cpu_timer_interrupt(unsigned int ext_int_code, +static void do_cpu_timer_interrupt(struct ext_code ext_code,  				   unsigned int param32, unsigned long param64)  {  	struct vtimer_queue *vq; @@ -346,7 +247,6 @@ static void do_cpu_timer_interrupt(unsigned int ext_int_code,  	}  	spin_unlock(&vq->lock); -	vq->do_spt = list_empty(&cb_list);  	do_callbacks(&cb_list);  	/* next event is first in list */ @@ -355,8 +255,7 @@ static void do_cpu_timer_interrupt(unsigned int ext_int_code,  	if (!list_empty(&vq->list)) {  		event = list_first_entry(&vq->list, struct vtimer_list, entry);  		next = event->expires; -	} else -		vq->do_spt = 0; +	}  	spin_unlock(&vq->lock);  	/*  	 * To improve precision add the time spent by the @@ -570,6 +469,9 @@ void init_cpu_vtimer(void)  	/* enable cpu timer interrupts */  	__ctl_set_bit(0,10); + +	/* set initial cpu timer */ +	set_vtimer(0x7fffffffffffffffULL);  }  static int __cpuinit s390_nohz_notify(struct notifier_block *self, | 
