From 87d8b9eb7eb6669aad6435a51e9862362141ba76 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Thu, 18 Jul 2013 16:21:14 -0700
Subject: clocksource: Extract max nsec calculation into separate function

We need to calculate the same number in the clocksource code and
the sched_clock code, so extract this code into its own function.
We also drop the min_t and just use min() because the two types
are the same.

Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/clocksource.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index dbbf8aa7731b..67301a405712 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -292,6 +292,8 @@ extern void clocksource_resume(void);
 extern struct clocksource * __init __weak clocksource_default_clock(void);
 extern void clocksource_mark_unstable(struct clocksource *cs);
 
+extern u64
+clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask);
 extern void
 clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);
 
-- 
cgit v1.2.3


From e7e3ff1bfe9c42ee31172e9afdc0383a9e595e29 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Thu, 18 Jul 2013 16:21:17 -0700
Subject: sched_clock: Add support for >32 bit sched_clock

The ARM architected system counter has at least 56 usable bits.
Add support for counters with more than 32 bits to the generic
sched_clock implementation so we can increase the time between
wakeups due to dealing with wrap-around on these devices while
benefiting from the irqtime accounting and suspend/resume
handling that the generic sched_clock code already has. On my
system using 56 bits over 32 bits changes the wraparound time
from a few minutes to an hour. For faster running counters (GHz
range) this is even more important because we may not be able to
execute the timer in time to deal with the wraparound if only 32
bits are used.

We choose a maxsec value of 3600 seconds because we assume no
system will go idle for more than an hour. In the future we may
need to increase this value.

Note: All users should switch over to the 64-bit read function so
we can remove setup_sched_clock() in favor of sched_clock_register().

Cc: Russell King <linux@arm.linux.org.uk>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/sched_clock.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h
index fa7922c80a41..eca7abeb86fc 100644
--- a/include/linux/sched_clock.h
+++ b/include/linux/sched_clock.h
@@ -15,6 +15,8 @@ static inline void sched_clock_postinit(void) { }
 #endif
 
 extern void setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate);
+extern void sched_clock_register(u64 (*read)(void), int bits,
+				 unsigned long rate);
 
 extern unsigned long long (*sched_clock_func)(void);
 
-- 
cgit v1.2.3


From 272686bf46a34f86d270cf192f68769667792026 Mon Sep 17 00:00:00 2001
From: Leif Lindholm <leif.lindholm@linaro.org>
Date: Thu, 5 Sep 2013 11:34:54 +0100
Subject: efi: x86: ia64: provide a generic efi_config_init()

Common to (U)EFI support on all platforms is the global "efi" data
structure, and the code that parses the System Table to locate
addresses to populate that structure with.

This patch adds both of these to the global EFI driver code and
removes the local definition of the global "efi" data structure from
the x86 and ia64 code.

Squashed into one big patch to avoid breaking bisection.

Signed-off-by: Leif Lindholm <leif.lindholm@linaro.org>
Acked-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 include/linux/efi.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 5f8f176154f7..09d9e4212799 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -404,6 +404,12 @@ typedef struct {
 	unsigned long table;
 } efi_config_table_t;
 
+typedef struct {
+	efi_guid_t guid;
+	const char *name;
+	unsigned long *ptr;
+} efi_config_table_type_t;
+
 #define EFI_SYSTEM_TABLE_SIGNATURE ((u64)0x5453595320494249ULL)
 
 #define EFI_2_30_SYSTEM_TABLE_REVISION  ((2 << 16) | (30))
@@ -587,6 +593,7 @@ static inline efi_status_t efi_query_variable_store(u32 attributes, unsigned lon
 }
 #endif
 extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr);
+extern int efi_config_init(efi_config_table_type_t *arch_tables);
 extern u64 efi_get_iobase (void);
 extern u32 efi_mem_type (unsigned long phys_addr);
 extern u64 efi_mem_attributes (unsigned long phys_addr);
-- 
cgit v1.2.3


From 258f6fd738221766b512cd8c7120563b78d62829 Mon Sep 17 00:00:00 2001
From: Leif Lindholm <leif.lindholm@linaro.org>
Date: Thu, 5 Sep 2013 11:34:55 +0100
Subject: efi: x86: make efi_lookup_mapped_addr() a common function

efi_lookup_mapped_addr() is a handy utility for other platforms than
x86. Move it from arch/x86 to drivers/firmware. Add memmap pointer
to global efi structure, and initialise it on x86.

Signed-off-by: Leif Lindholm <leif.lindholm@linaro.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 include/linux/efi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 09d9e4212799..c084b6d942c3 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -558,6 +558,7 @@ extern struct efi {
 	efi_get_next_high_mono_count_t *get_next_high_mono_count;
 	efi_reset_system_t *reset_system;
 	efi_set_virtual_address_map_t *set_virtual_address_map;
+	struct efi_memory_map *memmap;
 } efi;
 
 static inline int
-- 
cgit v1.2.3


From 5a523605afa7d3b54b2e7041f8c9e6bc39872a7e Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Tue, 10 Sep 2013 15:45:05 +0530
Subject: regulator: core: provide fixed voltage in desc for single voltage
 rail

If given rail has the single voltage (n_voltages = 1) then provide the
rail voltage through regulator descriptor so that core can use this
value for finding voltage.

This will avoid the implementation of the callback for get_voltage() or
list_voltage() callback on regulator driver.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/regulator/driver.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 67e13aa5a478..9e8241a9f28f 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -207,6 +207,7 @@ enum regulator_type {
  * @min_uV: Voltage given by the lowest selector (if linear mapping)
  * @uV_step: Voltage increase with each selector (if linear mapping)
  * @linear_min_sel: Minimal selector for starting linear mapping
+ * @fixed_uV: Fixed voltage of rails.
  * @ramp_delay: Time to settle down after voltage change (unit: uV/us)
  * @volt_table: Voltage mapping table (if table based mapping)
  *
@@ -239,6 +240,7 @@ struct regulator_desc {
 	unsigned int min_uV;
 	unsigned int uV_step;
 	unsigned int linear_min_sel;
+	int fixed_uV;
 	unsigned int ramp_delay;
 
 	const struct regulator_linear_range *linear_ranges;
-- 
cgit v1.2.3


From fdf200290581150f7b69148abf6ca860684cbfbb Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Sun, 1 Sep 2013 20:24:50 -0700
Subject: regmap: add regmap_field_update_bits()

Current regmap_field is supporting read/write functions.
This patch adds new update_bits function for it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/regmap.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index a10380bfbeac..4c8c20a7a75d 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -448,6 +448,8 @@ void devm_regmap_field_free(struct device *dev,	struct regmap_field *field);
 
 int regmap_field_read(struct regmap_field *field, unsigned int *val);
 int regmap_field_write(struct regmap_field *field, unsigned int val);
+int regmap_field_update_bits(struct regmap_field *field,
+			     unsigned int mask, unsigned int val);
 
 /**
  * Description of an IRQ for the generic regmap irq_chip.
-- 
cgit v1.2.3


From 5ce0ba88650f2606244a761d92e2b725f4ab3583 Mon Sep 17 00:00:00 2001
From: Hiep Cao Minh <cm-hiep@jinso.co.jp>
Date: Tue, 3 Sep 2013 13:10:26 +0900
Subject: spi: rcar: add Renesas QSPI support on RSPI

The R8A7790 has QSPI module which is very similar to RSPI.
This patch adds into RSPI module together to supports QSPI module.

Signed-off-by: Hiep Cao Minh <cm-hiep@jinso.co.jp>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/spi/rspi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/spi/rspi.h b/include/linux/spi/rspi.h
index 900f0e328235..a25bd6f65e7f 100644
--- a/include/linux/spi/rspi.h
+++ b/include/linux/spi/rspi.h
@@ -26,6 +26,8 @@ struct rspi_plat_data {
 	unsigned int dma_rx_id;
 
 	unsigned dma_width_16bit:1;	/* DMAC read/write width = 16-bit */
+
+	u16 num_chipselect;
 };
 
 #endif
-- 
cgit v1.2.3


From b33e46bcdc4e598d738ed12a5a7906be4e11d786 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@linaro.org>
Date: Sat, 31 Aug 2013 11:58:26 +0100
Subject: regulator: core: Provide managed regulator registration

Many regulator drivers have a remove function that consists solely of
calling regulator_unregister() so provide a devm_regulator_register()
in order to allow this repeated code to be removed and help eliminate
error handling code.

Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/regulator/driver.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 67e13aa5a478..8474c7f88745 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -334,7 +334,12 @@ struct regulator_dev {
 struct regulator_dev *
 regulator_register(const struct regulator_desc *regulator_desc,
 		   const struct regulator_config *config);
+struct regulator_dev *
+devm_regulator_register(struct device *dev,
+			const struct regulator_desc *regulator_desc,
+			const struct regulator_config *config);
 void regulator_unregister(struct regulator_dev *rdev);
+void devm_regulator_unregister(struct device *dev, struct regulator_dev *rdev);
 
 int regulator_notifier_call_chain(struct regulator_dev *rdev,
 				  unsigned long event, void *data);
-- 
cgit v1.2.3


From 4f0ac6dabf867095b31f851ba0d0ceaca2f87e2e Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@linaro.org>
Date: Fri, 13 Sep 2013 19:51:47 +0100
Subject: regulator: core: Remove unused regulator_use_dummy_regulator()

No boards have used this functionality and the new default of providing
dummy regulators by default provides a better solution to the problem it
was trying to solve.

Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/regulator/machine.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 999b20ce06cf..a9f7c55a4d4d 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -193,15 +193,10 @@ int regulator_suspend_finish(void);
 
 #ifdef CONFIG_REGULATOR
 void regulator_has_full_constraints(void);
-void regulator_use_dummy_regulator(void);
 #else
 static inline void regulator_has_full_constraints(void)
 {
 }
-
-static inline void regulator_use_dummy_regulator(void)
-{
-}
 #endif
 
 #endif
-- 
cgit v1.2.3


From 666d5b4c742ba666eb68b467d777b7862f362ae5 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@linaro.org>
Date: Sat, 31 Aug 2013 18:50:52 +0100
Subject: spi: core: Add devm_spi_register_master()

Help simplify the cleanup code for SPI master drivers by providing a
managed master registration function, ensuring that the master is
automatically unregistered whenever the device is unbound.

Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/spi/spi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 887116dbce2c..4d634d66ba0b 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -434,6 +434,8 @@ extern struct spi_master *
 spi_alloc_master(struct device *host, unsigned size);
 
 extern int spi_register_master(struct spi_master *master);
+extern int devm_spi_register_master(struct device *dev,
+				    struct spi_master *master);
 extern void spi_unregister_master(struct spi_master *master);
 
 extern struct spi_master *spi_busnum_to_master(u16 busnum);
-- 
cgit v1.2.3


From 4b08478422040ae8cb11acc15d51f1cdb0ac39c8 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Fri, 30 Aug 2013 06:01:49 -0700
Subject: Drop support for Renesas H8/300 (h8300) architecture

H8/300 has been dead for several years, and the kernel for it
has not compiled for ages. Drop support for it.

Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 include/linux/serial_sci.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index d34049712a4d..50fe651da965 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -5,7 +5,7 @@
 #include <linux/sh_dma.h>
 
 /*
- * Generic header for SuperH (H)SCI(F) (used by sh/sh64/h8300 and related parts)
+ * Generic header for SuperH (H)SCI(F) (used by sh/sh64 and related parts)
  */
 
 #define SCIx_NOT_SUPPORTED	(-1)
-- 
cgit v1.2.3


From a0102375ee82db1e08324b1a21484854cf2c1677 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Sun, 1 Sep 2013 20:30:50 -0700
Subject: regmap: Add regmap_fields APIs

Current Linux kernel is supporting regmap_field method
and it is very useful feature.
It needs one regmap_filed for one register access.

OTOH, there is multi port device which
has many same registers in the market.
The difference for each register access is
only its address offset.

Current API needs many regmap_field for such device,
but it is not good.
This patch adds new regmap_fileds API which can care
about multi port/offset access via regmap.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/regmap.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 4c8c20a7a75d..a12bea07f79e 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -425,11 +425,15 @@ bool regmap_reg_in_ranges(unsigned int reg,
  * @reg: Offset of the register within the regmap bank
  * @lsb: lsb of the register field.
  * @reg: msb of the register field.
+ * @id_size: port size if it has some ports
+ * @id_offset: address offset for each ports
  */
 struct reg_field {
 	unsigned int reg;
 	unsigned int lsb;
 	unsigned int msb;
+	unsigned int id_size;
+	unsigned int id_offset;
 };
 
 #define REG_FIELD(_reg, _lsb, _msb) {		\
@@ -451,6 +455,13 @@ int regmap_field_write(struct regmap_field *field, unsigned int val);
 int regmap_field_update_bits(struct regmap_field *field,
 			     unsigned int mask, unsigned int val);
 
+int regmap_fields_write(struct regmap_field *field, unsigned int id,
+			unsigned int val);
+int regmap_fields_read(struct regmap_field *field, unsigned int id,
+		       unsigned int *val);
+int regmap_fields_update_bits(struct regmap_field *field,  unsigned int id,
+			      unsigned int mask, unsigned int val);
+
 /**
  * Description of an IRQ for the generic regmap irq_chip.
  *
-- 
cgit v1.2.3


From 00c877c69ba315d6c565a4df51c71b11e82cdeb8 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Wed, 18 Sep 2013 18:18:02 +0530
Subject: regulator: core: add support for configuring turn-on time through
 constraints

The turn-on time of the regulator depends on the regulator device's
electrical characteristics. Sometimes regulator turn-on time also
depends on the capacitive load on the given platform and it can be
more than the datasheet value.

The driver provides the enable-time as per datasheet.

Add support for configure the enable ramp time through regulator
constraints so that regulator core can take this value for enable
time for that regulator.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Acked-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/regulator/machine.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 999b20ce06cf..8108751acb86 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -95,6 +95,7 @@ struct regulator_state {
  * @initial_state: Suspend state to set by default.
  * @initial_mode: Mode to set at startup.
  * @ramp_delay: Time to settle down after voltage change (unit: uV/us)
+ * @enable_time: Turn-on time of the rails (unit: microseconds)
  */
 struct regulation_constraints {
 
@@ -129,6 +130,7 @@ struct regulation_constraints {
 	unsigned int initial_mode;
 
 	unsigned int ramp_delay;
+	unsigned int enable_time;
 
 	/* constraint flags */
 	unsigned always_on:1;	/* regulator never off when system is on */
-- 
cgit v1.2.3


From e9a03add0c6ed5341fc59ff9c76843c2888a33fa Mon Sep 17 00:00:00 2001
From: Sonic Zhang <sonic.zhang@analog.com>
Date: Tue, 3 Sep 2013 16:28:59 +0800
Subject: pinctrl: ADI PIN control driver for the GPIO controller on bf54x and
 bf60x.

The new ADI GPIO2 controller was introduced since the BF548 and BF60x
processors. It differs a lot from the old one on BF5xx processors. So,
create a pinctrl driver under the pinctrl framework.

- Define gpio ports and pin interrupt controllers as individual platform
  devices.
- Register a pinctrl driver for the whole GPIO ports and pin interrupt
  devices.
- Probe pint devices before port devices. Put device instances into
  the global gpio and pint lists.
- Define peripheral, irq and gpio reservation bit masks for each gpio
  port as runtime resources.
- Save and restore gpio port and pint status MMRs in syscore PM functions.
- Create the plug-in subdrivers to hold the pinctrl soc data for bf54x
  and bf60x. Add soc data into struct adi_pinctrl. Initialize the soc data
  in pin controller probe function. Get the pin groups and functions via
  the soc data reference.
- Call gpiochip_add_pin_range() in gpio device probe function to register
  range cross reference between gpio device and pin control device.
- Get range by pinctrl_find_gpio_range_from_pin(), find gpio_port object
  by container_of() and find adi_pinctrl by pin control device name.
- Handle peripheral and gpio requests in pinctrl operation functions.
- Demux gpio IRQs via the irq_domain created by each GPIO port.

v2-changes:
- Remove unlinke() directive.

v3-changes:
- Rename struct adi_pmx to adi_pinctrl.
- Fix the comments of struct gpio_pint.
- Remove unused pin_base in struct gpio_port.
- Change pint_assign into bool type.
- Add comments about the relationship between pint device and port device
to the driver header.
- Use BIT macro to shift bit.
- Remove all bitmap reservation help functions. Inline reservation functions
into the actual code.
- Remove gpio and offset mutual reference help functions.
- Remove all help functions to find gpio_port and adi_pinctrl structs. Get
range by pinctrl_find_gpio_range_from_pin(), find gpio_port object by
container_of() and find adi_pinctrl by pin control device name.
- Pass bool type usage variable to port_setup help function.
- Separate long bit operations into several lines and add comments.
- Use debugfs to output all GPIO request information.
- Avoid to set drvdata to NULL
- Add explanation to function adi_gpio_init_int()
- Call gpiochip_add_pin_range() in gpio device probe function to register
range cross reference between gpio device and pin control device.
- Remove the reference to pin control device from the gpio_port struct.
Remove the reference list to gpio device from the adi_pinctrl struct.
Replace the global adi_pinctrl list with adi_gpio_port_list. Walk through
the gpio list to do power suspend and resume operations.
- Remove the global GPIO base from struct adi_pinctrl, define pin base in
the platform data for each GPIO port device.
- Initialize adi_pinctrl_setup in arch_initcall().
- print the status of triggers, whether it is in GPIO mode, if it is
flagged to be used as IRQ, etc in adi_pin_dbg_show().
- Create the plug-in subdrivers to hold the pinctrl soc data for bf54x
and bf60x. Add soc data into struct adi_pinctrl. Initialize the soc data
in pin controller probe function. Get the pin groups and functions via
the soc data reference.

v4-changes:
- remove useless system_state checking.
- replace dev_err with dev_warn in both irq and gpio pin cases.
- comment on relationship between irq type and invert operation.
- It is not necessary to check the reservation mode of the requested
pin in IRQ chip operation. Remove the reservation map.
- Use existing gpio/pinctrl subsystem debugfs files. Remove pinctrl-adi2
driver specific debugfs output.
- Add linkport group and function information for bf60x.
- Separate uart and ctsrts pins into 2 groups.
- Separate APAPI and alternative ATAPI pins into 2 groups.

Signed-off-by: Sonic Zhang <sonic.zhang@analog.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/platform_data/pinctrl-adi2.h | 40 ++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 include/linux/platform_data/pinctrl-adi2.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/pinctrl-adi2.h b/include/linux/platform_data/pinctrl-adi2.h
new file mode 100644
index 000000000000..8f91300617ec
--- /dev/null
+++ b/include/linux/platform_data/pinctrl-adi2.h
@@ -0,0 +1,40 @@
+/*
+ * Pinctrl Driver for ADI GPIO2 controller
+ *
+ * Copyright 2007-2013 Analog Devices Inc.
+ *
+ * Licensed under the GPLv2 or later
+ */
+
+
+#ifndef PINCTRL_ADI2_H
+#define PINCTRL_ADI2_H
+
+#include <linux/io.h>
+#include <linux/platform_device.h>
+
+/**
+ * struct adi_pinctrl_gpio_platform_data - Pinctrl gpio platform data
+ * for ADI GPIO2 device.
+ *
+ * @port_gpio_base: Optional global GPIO index of the GPIO bank.
+ *                 0 means driver decides.
+ * @port_pin_base: Pin index of the pin controller device.
+ * @port_width: PIN number of the GPIO bank device
+ * @pint_id: GPIO PINT device id that this GPIO bank should map to.
+ * @pint_assign: The 32-bit GPIO PINT registers can be divided into 2 parts. A
+ *               GPIO bank can be mapped into either low 16 bits[0] or high 16
+ *               bits[1] of each PINT register.
+ * @pint_map: GIOP bank mapping code in PINT device
+ */
+struct adi_pinctrl_gpio_platform_data {
+	unsigned int port_gpio_base;
+	unsigned int port_pin_base;
+	unsigned int port_width;
+	u8 pinctrl_id;
+	u8 pint_id;
+	bool pint_assign;
+	u8 pint_map;
+};
+
+#endif
-- 
cgit v1.2.3


From 9bd721c55c8a886b938a45198aab0ccb52f1f7fa Mon Sep 17 00:00:00 2001
From: Jason Low <jason.low2@hp.com>
Date: Fri, 13 Sep 2013 11:26:52 -0700
Subject: sched/balancing: Consider max cost of idle balance per sched domain

In this patch, we keep track of the max cost we spend doing idle load balancing
for each sched domain. If the avg time the CPU remains idle is less then the
time we have already spent on idle balancing + the max cost of idle balancing
in the sched domain, then we don't continue to attempt the balance. We also
keep a per rq variable, max_idle_balance_cost, which keeps track of the max
time spent on newidle load balances throughout all its domains so that we can
determine the avg_idle's max value.

By using the max, we avoid overrunning the average. This further reduces the
chance we attempt balancing when the CPU is not idle for longer than the cost
to balance.

Signed-off-by: Jason Low <jason.low2@hp.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1379096813-3032-3-git-send-email-jason.low2@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h    | 1 +
 include/linux/topology.h | 3 +++
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6682da36b293..be078ff9157f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -810,6 +810,7 @@ struct sched_domain {
 	unsigned int nr_balance_failed; /* initialise to 0 */
 
 	u64 last_update;
+	u64 max_newidle_lb_cost;
 
 #ifdef CONFIG_SCHEDSTATS
 	/* load_balance() stats */
diff --git a/include/linux/topology.h b/include/linux/topology.h
index d3cf0d6e7712..e2a2c3da2929 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -106,6 +106,7 @@ int arch_update_cpu_topology(void);
 	.last_balance		= jiffies,				\
 	.balance_interval	= 1,					\
 	.smt_gain		= 1178,	/* 15% */			\
+	.max_newidle_lb_cost	= 0,					\
 }
 #endif
 #endif /* CONFIG_SCHED_SMT */
@@ -135,6 +136,7 @@ int arch_update_cpu_topology(void);
 				,					\
 	.last_balance		= jiffies,				\
 	.balance_interval	= 1,					\
+	.max_newidle_lb_cost	= 0,					\
 }
 #endif
 #endif /* CONFIG_SCHED_MC */
@@ -166,6 +168,7 @@ int arch_update_cpu_topology(void);
 				,					\
 	.last_balance		= jiffies,				\
 	.balance_interval	= 1,					\
+	.max_newidle_lb_cost	= 0,					\
 }
 #endif
 
-- 
cgit v1.2.3


From f48627e686a69f5215cb0761e731edb3d9859dd9 Mon Sep 17 00:00:00 2001
From: Jason Low <jason.low2@hp.com>
Date: Fri, 13 Sep 2013 11:26:53 -0700
Subject: sched/balancing: Periodically decay max cost of idle balance

This patch builds on patch 2 and periodically decays that max value to
do idle balancing per sched domain by approximately 1% per second. Also
decay the rq's max_idle_balance_cost value.

Signed-off-by: Jason Low <jason.low2@hp.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1379096813-3032-4-git-send-email-jason.low2@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h    | 3 +++
 include/linux/topology.h | 3 +++
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index be078ff9157f..b5344de1658b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -810,7 +810,10 @@ struct sched_domain {
 	unsigned int nr_balance_failed; /* initialise to 0 */
 
 	u64 last_update;
+
+	/* idle_balance() stats */
 	u64 max_newidle_lb_cost;
+	unsigned long next_decay_max_lb_cost;
 
 #ifdef CONFIG_SCHEDSTATS
 	/* load_balance() stats */
diff --git a/include/linux/topology.h b/include/linux/topology.h
index e2a2c3da2929..12ae6ce997d6 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -107,6 +107,7 @@ int arch_update_cpu_topology(void);
 	.balance_interval	= 1,					\
 	.smt_gain		= 1178,	/* 15% */			\
 	.max_newidle_lb_cost	= 0,					\
+	.next_decay_max_lb_cost	= jiffies,				\
 }
 #endif
 #endif /* CONFIG_SCHED_SMT */
@@ -137,6 +138,7 @@ int arch_update_cpu_topology(void);
 	.last_balance		= jiffies,				\
 	.balance_interval	= 1,					\
 	.max_newidle_lb_cost	= 0,					\
+	.next_decay_max_lb_cost	= jiffies,				\
 }
 #endif
 #endif /* CONFIG_SCHED_MC */
@@ -169,6 +171,7 @@ int arch_update_cpu_topology(void);
 	.last_balance		= jiffies,				\
 	.balance_interval	= 1,					\
 	.max_newidle_lb_cost	= 0,					\
+	.next_decay_max_lb_cost	= jiffies,				\
 }
 #endif
 
-- 
cgit v1.2.3


From 6112fe60ac1bd1e68da8cc4248289d6e48015f9b Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Fri, 20 Sep 2013 18:00:10 +0530
Subject: regmap: add helper macro to set min/max range of register

Add helper macro to set the min and max value of the register range.

This is useful when initialising the register ranges of the device like

static const struct regmap_range readable_ranges[] = {
	regmap_reg_range(DEVICE_REG0, DEVICE_REG10),
};

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/regmap.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index a10380bfbeac..7d3ae2be6869 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -70,6 +70,8 @@ struct regmap_range {
 	unsigned int range_max;
 };
 
+#define regmap_reg_range(low, high) { .range_min = low, .range_max = high, }
+
 /*
  * A table of ranges including some yes ranges and some no ranges.
  * If a register belongs to a no_range, the corresponding check function
-- 
cgit v1.2.3


From 2a855b644c310d5db5a80b8816c0c7748c167977 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 23 Aug 2013 09:40:42 -0700
Subject: rcu: Make list_splice_init_rcu() account for RCU readers

The list_splice_init_rcu() function allows a list visible to RCU readers
to be spliced into another list visible to RCU readers.  This is OK,
except for the use of INIT_LIST_HEAD(), which does pointer updates
without doing anything to make those updates safe for concurrent readers.

Of course, most of the time INIT_LIST_HEAD() is being used in reader-free
contexts, such as initialization or cleanup, so it is OK for it to update
pointers in an unsafe-for-RCU-readers manner.  This commit therefore
creates an INIT_LIST_HEAD_RCU() that uses ACCESS_ONCE() to make the updates
reader-safe.  The reason that we can use ACCESS_ONCE() instead of the more
typical rcu_assign_pointer() is that list_splice_init_rcu() is updating the
pointers to reference something that is already visible to readers, so
that there is no problem with pre-initialized values.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rculist.h | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 4106721c4e5e..45a0a9e81478 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -18,6 +18,21 @@
  * be used anywhere you would want to use a list_empty_rcu().
  */
 
+/*
+ * INIT_LIST_HEAD_RCU - Initialize a list_head visible to RCU readers
+ * @list: list to be initialized
+ *
+ * You should instead use INIT_LIST_HEAD() for normal initialization and
+ * cleanup tasks, when readers have no access to the list being initialized.
+ * However, if the list being initialized is visible to readers, you
+ * need to keep the compiler from being too mischievous.
+ */
+static inline void INIT_LIST_HEAD_RCU(struct list_head *list)
+{
+	ACCESS_ONCE(list->next) = list;
+	ACCESS_ONCE(list->prev) = list;
+}
+
 /*
  * return the ->next pointer of a list_head in an rcu safe
  * way, we must not access it directly
@@ -191,9 +206,13 @@ static inline void list_splice_init_rcu(struct list_head *list,
 	if (list_empty(list))
 		return;
 
-	/* "first" and "last" tracking list, so initialize it. */
+	/*
+	 * "first" and "last" tracking list, so initialize it.  RCU readers
+	 * have access to this list, so we must use INIT_LIST_HEAD_RCU()
+	 * instead of INIT_LIST_HEAD().
+	 */
 
-	INIT_LIST_HEAD(list);
+	INIT_LIST_HEAD_RCU(list);
 
 	/*
 	 * At this point, the list body still points to the source list.
-- 
cgit v1.2.3


From ed37ddffe201bfad7be3c45bc08bd65b5298adca Mon Sep 17 00:00:00 2001
From: Roy Franz <roy.franz@linaro.org>
Date: Sun, 22 Sep 2013 15:45:26 -0700
Subject: efi: Add proper definitions for some EFI function pointers.

The x86/AMD64 EFI stubs must use a call wrapper to convert between
the Linux and EFI ABIs, so void pointers are sufficient.  For ARM,
the ABIs are compatible, so we can directly invoke the function
pointers.  The functions that are used by the ARM stub are updated
to match the EFI definitions.
Also add some EFI types used by EFI functions.

Signed-off-by: Roy Franz <roy.franz@linaro.org>
Acked-by: Mark Salter <msalter@redhat.com>
Reviewed-by: Grant Likely <grant.likely@linaro.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 include/linux/efi.h | 50 ++++++++++++++++++++++++++++++++++----------------
 1 file changed, 34 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index c084b6d942c3..bc5687d0f315 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -39,6 +39,8 @@
 typedef unsigned long efi_status_t;
 typedef u8 efi_bool_t;
 typedef u16 efi_char16_t;		/* UNICODE character */
+typedef u64 efi_physical_addr_t;
+typedef void *efi_handle_t;
 
 
 typedef struct {
@@ -96,6 +98,7 @@ typedef	struct {
 #define EFI_MEMORY_DESCRIPTOR_VERSION	1
 
 #define EFI_PAGE_SHIFT		12
+#define EFI_PAGE_SIZE		(1UL << EFI_PAGE_SHIFT)
 
 typedef struct {
 	u32 type;
@@ -157,11 +160,13 @@ typedef struct {
 	efi_table_hdr_t hdr;
 	void *raise_tpl;
 	void *restore_tpl;
-	void *allocate_pages;
-	void *free_pages;
-	void *get_memory_map;
-	void *allocate_pool;
-	void *free_pool;
+	efi_status_t (*allocate_pages)(int, int, unsigned long,
+				       efi_physical_addr_t *);
+	efi_status_t (*free_pages)(efi_physical_addr_t, unsigned long);
+	efi_status_t (*get_memory_map)(unsigned long *, void *, unsigned long *,
+				       unsigned long *, u32 *);
+	efi_status_t (*allocate_pool)(int, unsigned long, void **);
+	efi_status_t (*free_pool)(void *);
 	void *create_event;
 	void *set_timer;
 	void *wait_for_event;
@@ -171,7 +176,7 @@ typedef struct {
 	void *install_protocol_interface;
 	void *reinstall_protocol_interface;
 	void *uninstall_protocol_interface;
-	void *handle_protocol;
+	efi_status_t (*handle_protocol)(efi_handle_t, efi_guid_t *, void **);
 	void *__reserved;
 	void *register_protocol_notify;
 	void *locate_handle;
@@ -181,7 +186,7 @@ typedef struct {
 	void *start_image;
 	void *exit;
 	void *unload_image;
-	void *exit_boot_services;
+	efi_status_t (*exit_boot_services)(efi_handle_t, unsigned long);
 	void *get_next_monotonic_count;
 	void *stall;
 	void *set_watchdog_timer;
@@ -494,10 +499,6 @@ typedef struct {
 	unsigned long unload;
 } efi_loaded_image_t;
 
-typedef struct {
-	u64 revision;
-	void *open_volume;
-} efi_file_io_interface_t;
 
 typedef struct {
 	u64 size;
@@ -510,20 +511,30 @@ typedef struct {
 	efi_char16_t filename[1];
 } efi_file_info_t;
 
-typedef struct {
+typedef struct _efi_file_handle {
 	u64 revision;
-	void *open;
-	void *close;
+	efi_status_t (*open)(struct _efi_file_handle *,
+			     struct _efi_file_handle **,
+			     efi_char16_t *, u64, u64);
+	efi_status_t (*close)(struct _efi_file_handle *);
 	void *delete;
-	void *read;
+	efi_status_t (*read)(struct _efi_file_handle *, unsigned long *,
+			     void *);
 	void *write;
 	void *get_position;
 	void *set_position;
-	void *get_info;
+	efi_status_t (*get_info)(struct _efi_file_handle *, efi_guid_t *,
+			unsigned long *, void *);
 	void *set_info;
 	void *flush;
 } efi_file_handle_t;
 
+typedef struct _efi_file_io_interface {
+	u64 revision;
+	int (*open_volume)(struct _efi_file_io_interface *,
+			   efi_file_handle_t **);
+} efi_file_io_interface_t;
+
 #define EFI_FILE_MODE_READ	0x0000000000000001
 #define EFI_FILE_MODE_WRITE	0x0000000000000002
 #define EFI_FILE_MODE_CREATE	0x8000000000000000
@@ -792,6 +803,13 @@ struct efivar_entry {
 	struct kobject kobj;
 };
 
+
+struct efi_simple_text_output_protocol {
+	void *reset;
+	efi_status_t (*output_string)(void *, void *);
+	void *test_string;
+};
+
 extern struct list_head efivar_sysfs_list;
 
 static inline void
-- 
cgit v1.2.3


From 3150398626466c6cc626732f60bc901d58f40677 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 12 Sep 2013 15:10:31 +0200
Subject: sched: Remove {set,clear}_need_resched

Preemption semantics are going to change which mandate a change.

All DRM usage sites are already broken and will not be affected (much)
by this change. DRM people are aware and will remove the last few
stragglers.

For now, leave an empty stub that generates a warning, once all users
are gone we can remove this.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: airlied@linux.ie
Cc: daniel.vetter@ffwll.ch
Cc: paulmck@linux.vnet.ibm.com
Link: http://lkml.kernel.org/n/tip-qfc1el2zvhxiyut4ai99ij4n@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/thread_info.h | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index e7e04736802f..a629e4b23217 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -104,8 +104,19 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
 #define test_thread_flag(flag) \
 	test_ti_thread_flag(current_thread_info(), flag)
 
-#define set_need_resched()	set_thread_flag(TIF_NEED_RESCHED)
-#define clear_need_resched()	clear_thread_flag(TIF_NEED_RESCHED)
+static inline __deprecated void set_need_resched(void)
+{
+	/*
+	 * Use of this function in deprecated.
+	 *
+	 * As of this writing there are only a few users in the DRM tree left
+	 * all of which are wrong and can be removed without causing too much
+	 * grief.
+	 *
+	 * The DRM people are aware and are working on removing the last few
+	 * instances.
+	 */
+}
 
 #if defined TIF_RESTORE_SIGMASK && !defined HAVE_SET_RESTORE_SIGMASK
 /*
-- 
cgit v1.2.3


From ea8117478918a4734586d35ff530721b682425be Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 11 Sep 2013 12:43:13 +0200
Subject: sched, idle: Fix the idle polling state logic

Mike reported that commit 7d1a9417 ("x86: Use generic idle loop")
regressed several workloads and caused excessive reschedule
interrupts.

The patch in question failed to notice that the x86 code had an
inverted sense of the polling state versus the new generic code (x86:
default polling, generic: default !polling).

Fix the two prominent x86 mwait based idle drivers and introduce a few
new generic polling helpers (fixing the wrong smp_mb__after_clear_bit
usage).

Also switch the idle routines to using tif_need_resched() which is an
immediate TIF_NEED_RESCHED test as opposed to need_resched which will
end up being slightly different.

Reported-by: Mike Galbraith <bitbucket@online.de>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: lenb@kernel.org
Cc: tglx@linutronix.de
Link: http://lkml.kernel.org/n/tip-nc03imb0etuefmzybzj7sprf@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h       | 78 +++++++++++++++++++++++++++++++++++++++++----
 include/linux/thread_info.h |  2 ++
 2 files changed, 73 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b5344de1658b..e783ec52295a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2479,34 +2479,98 @@ static inline int tsk_is_polling(struct task_struct *p)
 {
 	return task_thread_info(p)->status & TS_POLLING;
 }
-static inline void current_set_polling(void)
+static inline void __current_set_polling(void)
 {
 	current_thread_info()->status |= TS_POLLING;
 }
 
-static inline void current_clr_polling(void)
+static inline bool __must_check current_set_polling_and_test(void)
+{
+	__current_set_polling();
+
+	/*
+	 * Polling state must be visible before we test NEED_RESCHED,
+	 * paired by resched_task()
+	 */
+	smp_mb();
+
+	return unlikely(tif_need_resched());
+}
+
+static inline void __current_clr_polling(void)
 {
 	current_thread_info()->status &= ~TS_POLLING;
-	smp_mb__after_clear_bit();
+}
+
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+	__current_clr_polling();
+
+	/*
+	 * Polling state must be visible before we test NEED_RESCHED,
+	 * paired by resched_task()
+	 */
+	smp_mb();
+
+	return unlikely(tif_need_resched());
 }
 #elif defined(TIF_POLLING_NRFLAG)
 static inline int tsk_is_polling(struct task_struct *p)
 {
 	return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
 }
-static inline void current_set_polling(void)
+
+static inline void __current_set_polling(void)
 {
 	set_thread_flag(TIF_POLLING_NRFLAG);
 }
 
-static inline void current_clr_polling(void)
+static inline bool __must_check current_set_polling_and_test(void)
+{
+	__current_set_polling();
+
+	/*
+	 * Polling state must be visible before we test NEED_RESCHED,
+	 * paired by resched_task()
+	 *
+	 * XXX: assumes set/clear bit are identical barrier wise.
+	 */
+	smp_mb__after_clear_bit();
+
+	return unlikely(tif_need_resched());
+}
+
+static inline void __current_clr_polling(void)
 {
 	clear_thread_flag(TIF_POLLING_NRFLAG);
 }
+
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+	__current_clr_polling();
+
+	/*
+	 * Polling state must be visible before we test NEED_RESCHED,
+	 * paired by resched_task()
+	 */
+	smp_mb__after_clear_bit();
+
+	return unlikely(tif_need_resched());
+}
+
 #else
 static inline int tsk_is_polling(struct task_struct *p) { return 0; }
-static inline void current_set_polling(void) { }
-static inline void current_clr_polling(void) { }
+static inline void __current_set_polling(void) { }
+static inline void __current_clr_polling(void) { }
+
+static inline bool __must_check current_set_polling_and_test(void)
+{
+	return unlikely(tif_need_resched());
+}
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+	return unlikely(tif_need_resched());
+}
 #endif
 
 /*
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index a629e4b23217..fddbe2023a5d 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -118,6 +118,8 @@ static inline __deprecated void set_need_resched(void)
 	 */
 }
 
+#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
+
 #if defined TIF_RESTORE_SIGMASK && !defined HAVE_SET_RESTORE_SIGMASK
 /*
  * An arch can define its own version of set_restore_sigmask() to get the
-- 
cgit v1.2.3


From 4a2b4b222743bb07fedf985b884550f2ca067ea9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 14 Aug 2013 14:55:24 +0200
Subject: sched: Introduce preempt_count accessor functions

Replace the single preempt_count() 'function' that's an lvalue with
two proper functions:

 preempt_count() - returns the preempt_count value as rvalue
 preempt_count_set() - Allows setting the preempt-count value

Also provide preempt_count_ptr() as a convenience wrapper to implement
all modifying operations.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-orxrbycjozopqfhb4dxdkdvb@git.kernel.org
[ Fixed build failure. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/preempt.h | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index f5d4723cdb3d..eaac52a8fe6a 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -10,19 +10,32 @@
 #include <linux/linkage.h>
 #include <linux/list.h>
 
+static __always_inline int preempt_count(void)
+{
+	return current_thread_info()->preempt_count;
+}
+
+static __always_inline int *preempt_count_ptr(void)
+{
+	return &current_thread_info()->preempt_count;
+}
+
+static __always_inline void preempt_count_set(int pc)
+{
+	*preempt_count_ptr() = pc;
+}
+
 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
   extern void add_preempt_count(int val);
   extern void sub_preempt_count(int val);
 #else
-# define add_preempt_count(val)	do { preempt_count() += (val); } while (0)
-# define sub_preempt_count(val)	do { preempt_count() -= (val); } while (0)
+# define add_preempt_count(val)	do { *preempt_count_ptr() += (val); } while (0)
+# define sub_preempt_count(val)	do { *preempt_count_ptr() -= (val); } while (0)
 #endif
 
 #define inc_preempt_count() add_preempt_count(1)
 #define dec_preempt_count() sub_preempt_count(1)
 
-#define preempt_count()	(current_thread_info()->preempt_count)
-
 #ifdef CONFIG_PREEMPT
 
 asmlinkage void preempt_schedule(void);
@@ -81,9 +94,9 @@ do { \
 
 /* For debugging and tracer internals only! */
 #define add_preempt_count_notrace(val)			\
-	do { preempt_count() += (val); } while (0)
+	do { *preempt_count_ptr() += (val); } while (0)
 #define sub_preempt_count_notrace(val)			\
-	do { preempt_count() -= (val); } while (0)
+	do { *preempt_count_ptr() -= (val); } while (0)
 #define inc_preempt_count_notrace() add_preempt_count_notrace(1)
 #define dec_preempt_count_notrace() sub_preempt_count_notrace(1)
 
-- 
cgit v1.2.3


From f27dde8deef33c9e58027df11ceab2198601d6a6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 14 Aug 2013 14:55:31 +0200
Subject: sched: Add NEED_RESCHED to the preempt_count

In order to combine the preemption and need_resched test we need to
fold the need_resched information into the preempt_count value.

Since the NEED_RESCHED flag is set across CPUs this needs to be an
atomic operation, however we very much want to avoid making
preempt_count atomic, therefore we keep the existing TIF_NEED_RESCHED
infrastructure in place but at 3 sites test it and fold its value into
preempt_count; namely:

 - resched_task() when setting TIF_NEED_RESCHED on the current task
 - scheduler_ipi() when resched_task() sets TIF_NEED_RESCHED on a
                   remote task it follows it up with a reschedule IPI
                   and we can modify the cpu local preempt_count from
                   there.
 - cpu_idle_loop() for when resched_task() found tsk_is_polling().

We use an inverted bitmask to indicate need_resched so that a 0 means
both need_resched and !atomic.

Also remove the barrier() in preempt_enable() between
preempt_enable_no_resched() and preempt_check_resched() to avoid
having to reload the preemption value and allow the compiler to use
the flags of the previuos decrement. I couldn't come up with any sane
reason for this barrier() to be there as preempt_enable_no_resched()
already has a barrier() before doing the decrement.

Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-7a7m5qqbn5pmwnd4wko9u6da@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/preempt.h | 47 ++++++++++++++++++++++++++++++++++++++++++-----
 include/linux/sched.h   |  7 +++++--
 2 files changed, 47 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index eaac52a8fe6a..92e341853e4b 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -10,9 +10,19 @@
 #include <linux/linkage.h>
 #include <linux/list.h>
 
+/*
+ * We use the MSB mostly because its available; see <linux/preempt_mask.h> for
+ * the other bits -- can't include that header due to inclusion hell.
+ */
+#define PREEMPT_NEED_RESCHED	0x80000000
+
+/*
+ * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users
+ * that think a non-zero value indicates we cannot preempt.
+ */
 static __always_inline int preempt_count(void)
 {
-	return current_thread_info()->preempt_count;
+	return current_thread_info()->preempt_count & ~PREEMPT_NEED_RESCHED;
 }
 
 static __always_inline int *preempt_count_ptr(void)
@@ -20,11 +30,40 @@ static __always_inline int *preempt_count_ptr(void)
 	return &current_thread_info()->preempt_count;
 }
 
+/*
+ * We now loose PREEMPT_NEED_RESCHED and cause an extra reschedule; however the
+ * alternative is loosing a reschedule. Better schedule too often -- also this
+ * should be a very rare operation.
+ */
 static __always_inline void preempt_count_set(int pc)
 {
 	*preempt_count_ptr() = pc;
 }
 
+/*
+ * We fold the NEED_RESCHED bit into the preempt count such that
+ * preempt_enable() can decrement and test for needing to reschedule with a
+ * single instruction.
+ *
+ * We invert the actual bit, so that when the decrement hits 0 we know we both
+ * need to resched (the bit is cleared) and can resched (no preempt count).
+ */
+
+static __always_inline void set_preempt_need_resched(void)
+{
+	*preempt_count_ptr() &= ~PREEMPT_NEED_RESCHED;
+}
+
+static __always_inline void clear_preempt_need_resched(void)
+{
+	*preempt_count_ptr() |= PREEMPT_NEED_RESCHED;
+}
+
+static __always_inline bool test_preempt_need_resched(void)
+{
+	return !(*preempt_count_ptr() & PREEMPT_NEED_RESCHED);
+}
+
 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
   extern void add_preempt_count(int val);
   extern void sub_preempt_count(int val);
@@ -42,7 +81,7 @@ asmlinkage void preempt_schedule(void);
 
 #define preempt_check_resched() \
 do { \
-	if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \
+	if (unlikely(!*preempt_count_ptr())) \
 		preempt_schedule(); \
 } while (0)
 
@@ -52,7 +91,7 @@ void preempt_schedule_context(void);
 
 #define preempt_check_resched_context() \
 do { \
-	if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \
+	if (unlikely(!*preempt_count_ptr())) \
 		preempt_schedule_context(); \
 } while (0)
 #else
@@ -88,7 +127,6 @@ do { \
 #define preempt_enable() \
 do { \
 	preempt_enable_no_resched(); \
-	barrier(); \
 	preempt_check_resched(); \
 } while (0)
 
@@ -116,7 +154,6 @@ do { \
 #define preempt_enable_notrace() \
 do { \
 	preempt_enable_no_resched_notrace(); \
-	barrier(); \
 	preempt_check_resched_context(); \
 } while (0)
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e783ec52295a..9fa151fb968e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -22,6 +22,7 @@ struct sched_param {
 #include <linux/errno.h>
 #include <linux/nodemask.h>
 #include <linux/mm_types.h>
+#include <linux/preempt.h>
 
 #include <asm/page.h>
 #include <asm/ptrace.h>
@@ -434,7 +435,9 @@ struct task_cputime {
  * We include PREEMPT_ACTIVE to avoid cond_resched() from working
  * before the scheduler is active -- see should_resched().
  */
-#define INIT_PREEMPT_COUNT	(1 + PREEMPT_ACTIVE)
+#define INIT_PREEMPT_COUNT	(1 + PREEMPT_ACTIVE + PREEMPT_NEED_RESCHED)
+#define PREEMPT_ENABLED		(PREEMPT_NEED_RESCHED)
+#define PREEMPT_DISABLED	(1 + PREEMPT_NEED_RESCHED)
 
 /**
  * struct thread_group_cputimer - thread group interval timer counts
@@ -2408,7 +2411,7 @@ static inline int signal_pending_state(long state, struct task_struct *p)
 
 static inline int need_resched(void)
 {
-	return unlikely(test_thread_flag(TIF_NEED_RESCHED));
+	return unlikely(test_preempt_need_resched());
 }
 
 /*
-- 
cgit v1.2.3


From a787870924dbd6f321661e06d4ec1c7a408c9ccf Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 14 Aug 2013 14:55:40 +0200
Subject: sched, arch: Create asm/preempt.h

In order to prepare to per-arch implementations of preempt_count move
the required bits into an asm-generic header and use this for all
archs.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-h5j0c1r3e3fk015m30h8f1zx@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/preempt.h | 49 +------------------------------------------------
 1 file changed, 1 insertion(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 92e341853e4b..df8e245e8729 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -6,7 +6,6 @@
  * preempt_count (used for kernel preemption, interrupt count, etc.)
  */
 
-#include <linux/thread_info.h>
 #include <linux/linkage.h>
 #include <linux/list.h>
 
@@ -16,53 +15,7 @@
  */
 #define PREEMPT_NEED_RESCHED	0x80000000
 
-/*
- * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users
- * that think a non-zero value indicates we cannot preempt.
- */
-static __always_inline int preempt_count(void)
-{
-	return current_thread_info()->preempt_count & ~PREEMPT_NEED_RESCHED;
-}
-
-static __always_inline int *preempt_count_ptr(void)
-{
-	return &current_thread_info()->preempt_count;
-}
-
-/*
- * We now loose PREEMPT_NEED_RESCHED and cause an extra reschedule; however the
- * alternative is loosing a reschedule. Better schedule too often -- also this
- * should be a very rare operation.
- */
-static __always_inline void preempt_count_set(int pc)
-{
-	*preempt_count_ptr() = pc;
-}
-
-/*
- * We fold the NEED_RESCHED bit into the preempt count such that
- * preempt_enable() can decrement and test for needing to reschedule with a
- * single instruction.
- *
- * We invert the actual bit, so that when the decrement hits 0 we know we both
- * need to resched (the bit is cleared) and can resched (no preempt count).
- */
-
-static __always_inline void set_preempt_need_resched(void)
-{
-	*preempt_count_ptr() &= ~PREEMPT_NEED_RESCHED;
-}
-
-static __always_inline void clear_preempt_need_resched(void)
-{
-	*preempt_count_ptr() |= PREEMPT_NEED_RESCHED;
-}
-
-static __always_inline bool test_preempt_need_resched(void)
-{
-	return !(*preempt_count_ptr() & PREEMPT_NEED_RESCHED);
-}
+#include <asm/preempt.h>
 
 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
   extern void add_preempt_count(int val);
-- 
cgit v1.2.3


From bdb43806589096ac4272fe1307e789846ac08d7c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 10 Sep 2013 12:15:23 +0200
Subject: sched: Extract the basic add/sub preempt_count modifiers

Rewrite the preempt_count macros in order to extract the 3 basic
preempt_count value modifiers:

  __preempt_count_add()
  __preempt_count_sub()

and the new:

  __preempt_count_dec_and_test()

And since we're at it anyway, replace the unconventional
$op_preempt_count names with the more conventional preempt_count_$op.

Since these basic operators are equivalent to the previous _notrace()
variants, do away with the _notrace() versions.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-ewbpdbupy9xpsjhg960zwbv8@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/hardirq.h |   8 ++--
 include/linux/preempt.h | 106 ++++++++++++++++++++++--------------------------
 include/linux/sched.h   |   5 ---
 include/linux/uaccess.h |   8 +---
 4 files changed, 54 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 1e041063b226..d9cf963ac832 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -33,7 +33,7 @@ extern void rcu_nmi_exit(void);
 #define __irq_enter()					\
 	do {						\
 		account_irq_enter_time(current);	\
-		add_preempt_count(HARDIRQ_OFFSET);	\
+		preempt_count_add(HARDIRQ_OFFSET);	\
 		trace_hardirq_enter();			\
 	} while (0)
 
@@ -49,7 +49,7 @@ extern void irq_enter(void);
 	do {						\
 		trace_hardirq_exit();			\
 		account_irq_exit_time(current);		\
-		sub_preempt_count(HARDIRQ_OFFSET);	\
+		preempt_count_sub(HARDIRQ_OFFSET);	\
 	} while (0)
 
 /*
@@ -62,7 +62,7 @@ extern void irq_exit(void);
 		lockdep_off();					\
 		ftrace_nmi_enter();				\
 		BUG_ON(in_nmi());				\
-		add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET);	\
+		preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET);	\
 		rcu_nmi_enter();				\
 		trace_hardirq_enter();				\
 	} while (0)
@@ -72,7 +72,7 @@ extern void irq_exit(void);
 		trace_hardirq_exit();				\
 		rcu_nmi_exit();					\
 		BUG_ON(!in_nmi());				\
-		sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET);	\
+		preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET);	\
 		ftrace_nmi_exit();				\
 		lockdep_on();					\
 	} while (0)
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index df8e245e8729..2343d8715299 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -18,97 +18,86 @@
 #include <asm/preempt.h>
 
 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
-  extern void add_preempt_count(int val);
-  extern void sub_preempt_count(int val);
+extern void preempt_count_add(int val);
+extern void preempt_count_sub(int val);
+#define preempt_count_dec_and_test() ({ preempt_count_sub(1); should_resched(); })
 #else
-# define add_preempt_count(val)	do { *preempt_count_ptr() += (val); } while (0)
-# define sub_preempt_count(val)	do { *preempt_count_ptr() -= (val); } while (0)
+#define preempt_count_add(val)	__preempt_count_add(val)
+#define preempt_count_sub(val)	__preempt_count_sub(val)
+#define preempt_count_dec_and_test() __preempt_count_dec_and_test()
 #endif
 
-#define inc_preempt_count() add_preempt_count(1)
-#define dec_preempt_count() sub_preempt_count(1)
-
-#ifdef CONFIG_PREEMPT
-
-asmlinkage void preempt_schedule(void);
-
-#define preempt_check_resched() \
-do { \
-	if (unlikely(!*preempt_count_ptr())) \
-		preempt_schedule(); \
-} while (0)
-
-#ifdef CONFIG_CONTEXT_TRACKING
-
-void preempt_schedule_context(void);
-
-#define preempt_check_resched_context() \
-do { \
-	if (unlikely(!*preempt_count_ptr())) \
-		preempt_schedule_context(); \
-} while (0)
-#else
-
-#define preempt_check_resched_context() preempt_check_resched()
-
-#endif /* CONFIG_CONTEXT_TRACKING */
-
-#else /* !CONFIG_PREEMPT */
-
-#define preempt_check_resched()		do { } while (0)
-#define preempt_check_resched_context()	do { } while (0)
-
-#endif /* CONFIG_PREEMPT */
+#define __preempt_count_inc() __preempt_count_add(1)
+#define __preempt_count_dec() __preempt_count_sub(1)
 
+#define preempt_count_inc() preempt_count_add(1)
+#define preempt_count_dec() preempt_count_sub(1)
 
 #ifdef CONFIG_PREEMPT_COUNT
 
 #define preempt_disable() \
 do { \
-	inc_preempt_count(); \
+	preempt_count_inc(); \
 	barrier(); \
 } while (0)
 
 #define sched_preempt_enable_no_resched() \
 do { \
 	barrier(); \
-	dec_preempt_count(); \
+	preempt_count_dec(); \
 } while (0)
 
-#define preempt_enable_no_resched()	sched_preempt_enable_no_resched()
+#define preempt_enable_no_resched() sched_preempt_enable_no_resched()
 
+#ifdef CONFIG_PREEMPT
+asmlinkage void preempt_schedule(void);
 #define preempt_enable() \
 do { \
-	preempt_enable_no_resched(); \
-	preempt_check_resched(); \
+	barrier(); \
+	if (unlikely(preempt_count_dec_and_test())) \
+		preempt_schedule(); \
 } while (0)
 
-/* For debugging and tracer internals only! */
-#define add_preempt_count_notrace(val)			\
-	do { *preempt_count_ptr() += (val); } while (0)
-#define sub_preempt_count_notrace(val)			\
-	do { *preempt_count_ptr() -= (val); } while (0)
-#define inc_preempt_count_notrace() add_preempt_count_notrace(1)
-#define dec_preempt_count_notrace() sub_preempt_count_notrace(1)
+#define preempt_check_resched() \
+do { \
+	if (should_resched()) \
+		preempt_schedule(); \
+} while (0)
+
+#else
+#define preempt_enable() preempt_enable_no_resched()
+#define preempt_check_resched() do { } while (0)
+#endif
 
 #define preempt_disable_notrace() \
 do { \
-	inc_preempt_count_notrace(); \
+	__preempt_count_inc(); \
 	barrier(); \
 } while (0)
 
 #define preempt_enable_no_resched_notrace() \
 do { \
 	barrier(); \
-	dec_preempt_count_notrace(); \
+	__preempt_count_dec(); \
 } while (0)
 
-/* preempt_check_resched is OK to trace */
+#ifdef CONFIG_PREEMPT
+
+#ifdef CONFIG_CONTEXT_TRACKING
+asmlinkage void preempt_schedule_context(void);
+#else
+#define preempt_schedule_context() preempt_schedule()
+#endif
+
 #define preempt_enable_notrace() \
 do { \
-	preempt_enable_no_resched_notrace(); \
-	preempt_check_resched_context(); \
+	barrier(); \
+	if (unlikely(__preempt_count_dec_and_test())) \
+		preempt_schedule_context(); \
 } while (0)
+#else
+#define preempt_enable_notrace() preempt_enable_no_resched_notrace()
+#endif
 
 #else /* !CONFIG_PREEMPT_COUNT */
 
@@ -118,10 +107,11 @@ do { \
  * that can cause faults and scheduling migrate into our preempt-protected
  * region.
  */
-#define preempt_disable()		barrier()
+#define preempt_disable()			barrier()
 #define sched_preempt_enable_no_resched()	barrier()
-#define preempt_enable_no_resched()	barrier()
-#define preempt_enable()		barrier()
+#define preempt_enable_no_resched()		barrier()
+#define preempt_enable()			barrier()
+#define preempt_check_resched()			do { } while (0)
 
 #define preempt_disable_notrace()		barrier()
 #define preempt_enable_no_resched_notrace()	barrier()
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9fa151fb968e..06ac17c7e639 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2409,11 +2409,6 @@ static inline int signal_pending_state(long state, struct task_struct *p)
 	return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
 }
 
-static inline int need_resched(void)
-{
-	return unlikely(test_preempt_need_resched());
-}
-
 /*
  * cond_resched() and cond_resched_lock(): latency reduction via
  * explicit rescheduling in places that are safe. The return
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 5ca0951e1855..9d8cf056e661 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -15,7 +15,7 @@
  */
 static inline void pagefault_disable(void)
 {
-	inc_preempt_count();
+	preempt_count_inc();
 	/*
 	 * make sure to have issued the store before a pagefault
 	 * can hit.
@@ -30,11 +30,7 @@ static inline void pagefault_enable(void)
 	 * the pagefault handler again.
 	 */
 	barrier();
-	dec_preempt_count();
-	/*
-	 * make sure we do..
-	 */
-	barrier();
+	preempt_count_dec();
 	preempt_check_resched();
 }
 
-- 
cgit v1.2.3


From a233f1120c37724938f7201fe2353b2577adaaf9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 23 Sep 2013 19:04:26 +0200
Subject: sched: Prepare for per-cpu preempt_count

When using per-cpu preempt_count variables we need to save/restore the
preempt_count on context switch (into per task storage; for instance
the old thread_info::preempt_count variable) because of
PREEMPT_ACTIVE.

However, this means that on fork() the preempt_count value of the last
context switch gets copied and if we had a PREEMPT_ACTIVE switch right
before cloning a child task the child task will now too have
PREEMPT_ACTIVE set and start its life with an extra PREEMPT_ACTIVE
count.

Therefore we need to make init_task_preempt_count() unconditional;
this resets whatever preempt_count we inherited from our parent
process.

Doing so for !per-cpu implementations is harmless.

For !PREEMPT_COUNT kernels we need to be careful not to start life
with an increased preempt_count.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-4k0b7oy1rcdyzochwiixuwi9@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 06ac17c7e639..b09798b672f3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -428,6 +428,14 @@ struct task_cputime {
 		.sum_exec_runtime = 0,				\
 	}
 
+#define PREEMPT_ENABLED		(PREEMPT_NEED_RESCHED)
+
+#ifdef CONFIG_PREEMPT_COUNT
+#define PREEMPT_DISABLED	(1 + PREEMPT_ENABLED)
+#else
+#define PREEMPT_DISABLED	PREEMPT_ENABLED
+#endif
+
 /*
  * Disable preemption until the scheduler is running.
  * Reset by start_kernel()->sched_init()->init_idle().
@@ -435,9 +443,7 @@ struct task_cputime {
  * We include PREEMPT_ACTIVE to avoid cond_resched() from working
  * before the scheduler is active -- see should_resched().
  */
-#define INIT_PREEMPT_COUNT	(1 + PREEMPT_ACTIVE + PREEMPT_NEED_RESCHED)
-#define PREEMPT_ENABLED		(PREEMPT_NEED_RESCHED)
-#define PREEMPT_DISABLED	(1 + PREEMPT_NEED_RESCHED)
+#define INIT_PREEMPT_COUNT	(PREEMPT_DISABLED + PREEMPT_ACTIVE)
 
 /**
  * struct thread_group_cputimer - thread group interval timer counts
-- 
cgit v1.2.3


From 1a338ac32ca630f67df25b4a16436cccc314e997 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 14 Aug 2013 14:51:00 +0200
Subject: sched, x86: Optimize the preempt_schedule() call

Remove the bloat of the C calling convention out of the
preempt_enable() sites by creating an ASM wrapper which allows us to
do an asm("call ___preempt_schedule") instead.

calling.h bits by Andi Kleen

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-tk7xdi1cvvxewixzke8t8le1@git.kernel.org
[ Fixed build error. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/preempt.h | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 2343d8715299..a3d9dc8c2c00 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -50,18 +50,17 @@ do { \
 #define preempt_enable_no_resched() sched_preempt_enable_no_resched()
 
 #ifdef CONFIG_PREEMPT
-asmlinkage void preempt_schedule(void);
 #define preempt_enable() \
 do { \
 	barrier(); \
 	if (unlikely(preempt_count_dec_and_test())) \
-		preempt_schedule(); \
+		__preempt_schedule(); \
 } while (0)
 
 #define preempt_check_resched() \
 do { \
 	if (should_resched()) \
-		preempt_schedule(); \
+		__preempt_schedule(); \
 } while (0)
 
 #else
@@ -83,17 +82,15 @@ do { \
 
 #ifdef CONFIG_PREEMPT
 
-#ifdef CONFIG_CONTEXT_TRACKING
-asmlinkage void preempt_schedule_context(void);
-#else
-#define preempt_schedule_context() preempt_schedule()
+#ifndef CONFIG_CONTEXT_TRACKING
+#define __preempt_schedule_context() __preempt_schedule()
 #endif
 
 #define preempt_enable_notrace() \
 do { \
 	barrier(); \
 	if (unlikely(__preempt_count_dec_and_test())) \
-		preempt_schedule_context(); \
+		__preempt_schedule_context(); \
 } while (0)
 #else
 #define preempt_enable_notrace() preempt_enable_no_resched_notrace()
-- 
cgit v1.2.3


From fd792f8fbcfa95674b6c417429f576ad1d808086 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@linaro.org>
Date: Mon, 23 Sep 2013 19:14:32 +0100
Subject: mfd: mc13xxx: Move SPI erratum workaround into SPI I/O function

Move the workaround for double sending AUDIO_CODEC and AUDIO_DAC writes
into the SPI core, aiding refactoring to eliminate the ASoC custom I/O
functions and avoiding the extra writes for I2C.

Signed-off-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/mc13xxx.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/mc13xxx.h b/include/linux/mfd/mc13xxx.h
index 41ed59276c00..67c17b5a6f44 100644
--- a/include/linux/mfd/mc13xxx.h
+++ b/include/linux/mfd/mc13xxx.h
@@ -41,6 +41,13 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx,
 		unsigned int mode, unsigned int channel,
 		u8 ato, bool atox, unsigned int *sample);
 
+#define MC13783_AUDIO_RX0	36
+#define MC13783_AUDIO_RX1	37
+#define MC13783_AUDIO_TX	38
+#define MC13783_SSI_NETWORK	39
+#define MC13783_AUDIO_CODEC	40
+#define MC13783_AUDIO_DAC	41
+
 #define MC13XXX_IRQ_ADCDONE	0
 #define MC13XXX_IRQ_ADCBISDONE	1
 #define MC13XXX_IRQ_TS		2
-- 
cgit v1.2.3


From cc6783f788d8fe8b23ec6fc2762f5e8c9a418eee Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 6 Sep 2013 17:39:49 -0700
Subject: rcu: Is it safe to enter an RCU read-side critical section?

There is currently no way for kernel code to determine whether it
is safe to enter an RCU read-side critical section, in other words,
whether or not RCU is paying attention to the currently running CPU.
Given the large and increasing quantity of code shared by the idle loop
and non-idle code, the this shortcoming is becoming increasingly painful.

This commit therefore adds __rcu_is_watching(), which returns true if
it is safe to enter an RCU read-side critical section on the currently
running CPU.  This function is quite fast, using only a __this_cpu_read().
However, the caller must disable preemption.

Reported-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h | 8 ++++----
 include/linux/rcutiny.h  | 9 +++++++++
 include/linux/rcutree.h  | 2 ++
 3 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index f1f1bc39346b..a53a21a2808c 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -261,6 +261,10 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev,
 		rcu_irq_exit(); \
 	} while (0)
 
+#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP)
+extern int rcu_is_cpu_idle(void);
+#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */
+
 /*
  * Infrastructure to implement the synchronize_() primitives in
  * TREE_RCU and rcu_barrier_() primitives in TINY_RCU.
@@ -297,10 +301,6 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head)
 }
 #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 
-#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP)
-extern int rcu_is_cpu_idle(void);
-#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP) */
-
 #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU)
 bool rcu_lockdep_current_cpu_online(void);
 #else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index e31005ee339e..bee665964878 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -132,4 +132,13 @@ static inline void rcu_scheduler_starting(void)
 }
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
+#ifdef CONFIG_RCU_TRACE
+
+static inline bool __rcu_is_watching(void)
+{
+	return !rcu_is_cpu_idle();
+}
+
+#endif /* #ifdef CONFIG_RCU_TRACE */
+
 #endif /* __LINUX_RCUTINY_H */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 226169d1bd2b..293613dfd2a5 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -90,4 +90,6 @@ extern void exit_rcu(void);
 extern void rcu_scheduler_starting(void);
 extern int rcu_scheduler_active __read_mostly;
 
+extern bool __rcu_is_watching(void);
+
 #endif /* __LINUX_RCUTREE_H */
-- 
cgit v1.2.3


From 5c173eb8bcb9c1aa888bd6d14a4cb746f3dd2420 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 13 Sep 2013 17:20:11 -0700
Subject: rcu: Consistent rcu_is_watching() naming

The old rcu_is_cpu_idle() function is just __rcu_is_watching() with
preemption disabled.  This commit therefore renames rcu_is_cpu_idle()
to rcu_is_watching.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h | 18 +++++++++---------
 include/linux/rcutiny.h  | 16 ++++++++++++----
 include/linux/rcutree.h  |  2 +-
 3 files changed, 22 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index a53a21a2808c..39cbb889e20d 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -262,7 +262,7 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev,
 	} while (0)
 
 #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP)
-extern int rcu_is_cpu_idle(void);
+extern bool __rcu_is_watching(void);
 #endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */
 
 /*
@@ -351,7 +351,7 @@ static inline int rcu_read_lock_held(void)
 {
 	if (!debug_lockdep_rcu_enabled())
 		return 1;
-	if (rcu_is_cpu_idle())
+	if (!rcu_is_watching())
 		return 0;
 	if (!rcu_lockdep_current_cpu_online())
 		return 0;
@@ -402,7 +402,7 @@ static inline int rcu_read_lock_sched_held(void)
 
 	if (!debug_lockdep_rcu_enabled())
 		return 1;
-	if (rcu_is_cpu_idle())
+	if (!rcu_is_watching())
 		return 0;
 	if (!rcu_lockdep_current_cpu_online())
 		return 0;
@@ -771,7 +771,7 @@ static inline void rcu_read_lock(void)
 	__rcu_read_lock();
 	__acquire(RCU);
 	rcu_lock_acquire(&rcu_lock_map);
-	rcu_lockdep_assert(!rcu_is_cpu_idle(),
+	rcu_lockdep_assert(rcu_is_watching(),
 			   "rcu_read_lock() used illegally while idle");
 }
 
@@ -792,7 +792,7 @@ static inline void rcu_read_lock(void)
  */
 static inline void rcu_read_unlock(void)
 {
-	rcu_lockdep_assert(!rcu_is_cpu_idle(),
+	rcu_lockdep_assert(rcu_is_watching(),
 			   "rcu_read_unlock() used illegally while idle");
 	rcu_lock_release(&rcu_lock_map);
 	__release(RCU);
@@ -821,7 +821,7 @@ static inline void rcu_read_lock_bh(void)
 	local_bh_disable();
 	__acquire(RCU_BH);
 	rcu_lock_acquire(&rcu_bh_lock_map);
-	rcu_lockdep_assert(!rcu_is_cpu_idle(),
+	rcu_lockdep_assert(rcu_is_watching(),
 			   "rcu_read_lock_bh() used illegally while idle");
 }
 
@@ -832,7 +832,7 @@ static inline void rcu_read_lock_bh(void)
  */
 static inline void rcu_read_unlock_bh(void)
 {
-	rcu_lockdep_assert(!rcu_is_cpu_idle(),
+	rcu_lockdep_assert(rcu_is_watching(),
 			   "rcu_read_unlock_bh() used illegally while idle");
 	rcu_lock_release(&rcu_bh_lock_map);
 	__release(RCU_BH);
@@ -857,7 +857,7 @@ static inline void rcu_read_lock_sched(void)
 	preempt_disable();
 	__acquire(RCU_SCHED);
 	rcu_lock_acquire(&rcu_sched_lock_map);
-	rcu_lockdep_assert(!rcu_is_cpu_idle(),
+	rcu_lockdep_assert(rcu_is_watching(),
 			   "rcu_read_lock_sched() used illegally while idle");
 }
 
@@ -875,7 +875,7 @@ static inline notrace void rcu_read_lock_sched_notrace(void)
  */
 static inline void rcu_read_unlock_sched(void)
 {
-	rcu_lockdep_assert(!rcu_is_cpu_idle(),
+	rcu_lockdep_assert(rcu_is_watching(),
 			   "rcu_read_unlock_sched() used illegally while idle");
 	rcu_lock_release(&rcu_sched_lock_map);
 	__release(RCU_SCHED);
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index bee665964878..09ebcbe9fd78 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -132,13 +132,21 @@ static inline void rcu_scheduler_starting(void)
 }
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
-#ifdef CONFIG_RCU_TRACE
+#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE)
 
-static inline bool __rcu_is_watching(void)
+static inline bool rcu_is_watching(void)
 {
-	return !rcu_is_cpu_idle();
+	return __rcu_is_watching();
 }
 
-#endif /* #ifdef CONFIG_RCU_TRACE */
+#else /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */
+
+static inline bool rcu_is_watching(void)
+{
+	return true;
+}
+
+
+#endif /* #else defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */
 
 #endif /* __LINUX_RCUTINY_H */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 293613dfd2a5..4b9c81548742 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -90,6 +90,6 @@ extern void exit_rcu(void);
 extern void rcu_scheduler_starting(void);
 extern int rcu_scheduler_active __read_mostly;
 
-extern bool __rcu_is_watching(void);
+extern bool rcu_is_watching(void);
 
 #endif /* __LINUX_RCUTREE_H */
-- 
cgit v1.2.3


From 75f93fed50c2abadbab6ef546b265f51ca975b27 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 27 Sep 2013 17:30:03 +0200
Subject: sched: Revert need_resched() to look at TIF_NEED_RESCHED

Yuanhan reported a serious throughput regression in his pigz
benchmark. Using the ftrace patch I found that several idle
paths need more TLC before we can switch the generic
need_resched() over to preempt_need_resched.

The preemption paths benefit most from preempt_need_resched and
do indeed use it; all other need_resched() users don't really
care that much so reverting need_resched() back to
tif_need_resched() is the simple and safe solution.

Reported-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: lkp@linux.intel.com
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/20130927153003.GF15690@laptop.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b09798b672f3..2ac5285db434 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2577,6 +2577,11 @@ static inline bool __must_check current_clr_polling_and_test(void)
 }
 #endif
 
+static __always_inline bool need_resched(void)
+{
+	return unlikely(tif_need_resched());
+}
+
 /*
  * Thread group CPU time accounting.
  */
-- 
cgit v1.2.3


From 7d65f4a6553203da6a22097821d151fbbe7e4956 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 5 Sep 2013 15:49:45 +0200
Subject: irq: Consolidate do_softirq() arch overriden implementations

All arch overriden implementations of do_softirq() share the following
common code: disable irqs (to avoid races with the pending check),
check if there are softirqs pending, then execute __do_softirq() on
a specific stack.

Consolidate the common parts such that archs only worry about the
stack switch.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@au1.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Mackerras <paulus@au1.ibm.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: James E.J. Bottomley <jejb@parisc-linux.org>
Cc: Helge Deller <deller@gmx.de>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/interrupt.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 5e865b554940..c9e831dc80bc 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -19,6 +19,7 @@
 
 #include <linux/atomic.h>
 #include <asm/ptrace.h>
+#include <asm/irq.h>
 
 /*
  * These correspond to the IORESOURCE_IRQ_* defines in
@@ -374,6 +375,16 @@ struct softirq_action
 
 asmlinkage void do_softirq(void);
 asmlinkage void __do_softirq(void);
+
+#ifdef __ARCH_HAS_DO_SOFTIRQ
+void do_softirq_own_stack(void);
+#else
+static inline void do_softirq_own_stack(void)
+{
+	__do_softirq();
+}
+#endif
+
 extern void open_softirq(int nr, void (*action)(struct softirq_action *));
 extern void softirq_init(void);
 extern void __raise_softirq_irqoff(unsigned int nr);
-- 
cgit v1.2.3


From 3713c0cfd06fa49729a12929a7ee8b7ad48f3c02 Mon Sep 17 00:00:00 2001
From: Soren Brinkmann <soren.brinkmann@xilinx.com>
Date: Wed, 18 Sep 2013 11:48:35 -0700
Subject: clockchips: Add FEAT_PERCPU clockevent flag

Add the flag CLOCK_EVT_FEAT_PERCPU which is supposed to be set for per
cpu clockevent devices.

Signed-off-by: Soren Brinkmann <soren.brinkmann@xilinx.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Michal Simek <michal.simek@xilinx.com>
---
 include/linux/clockchips.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 0857922e8ad0..493aa021c7a9 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -60,6 +60,7 @@ enum clock_event_mode {
  * Core shall set the interrupt affinity dynamically in broadcast mode
  */
 #define CLOCK_EVT_FEAT_DYNIRQ		0x000020
+#define CLOCK_EVT_FEAT_PERCPU		0x000040
 
 /**
  * struct clock_event_device - clock event device descriptor
-- 
cgit v1.2.3


From 05071aa864e84000759191438a4a9ff7ba2c360e Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Fri, 27 Sep 2013 16:34:27 +0200
Subject: spi: Add a spi_w8r16be() helper

This patch adds a new spi_w8r16be() helper, which is similar to spi_w8r16()
except that it converts the read data word from big endian to native endianness
before returning it. The reason for introducing this new helper is that for SPI
slave devices it is quite common that the read 16 bit data word is in big
endian. So users of spi_w8r16() have to convert the result to native endianness
manually. A second reason is that in this case the endianness of the return
value of spi_w8r16() depends on its sign. If it is negative (i.e. a error code)
it is already in native endianness, if it is positive it is in big endian. The
sparse code checker doesn't like this kind of mixed endianness and special
annotations are necessary to keep it quiet (E.g. casting to be16 using __force).
Doing the conversion to native endianness in the helper function does not
require such annotations since we are not mixing different endiannesses in the
same variable.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/spi/spi.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 887116dbce2c..0e0aebdeb56b 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -823,6 +823,33 @@ static inline ssize_t spi_w8r16(struct spi_device *spi, u8 cmd)
 	return (status < 0) ? status : result;
 }
 
+/**
+ * spi_w8r16be - SPI synchronous 8 bit write followed by 16 bit big-endian read
+ * @spi: device with which data will be exchanged
+ * @cmd: command to be written before data is read back
+ * Context: can sleep
+ *
+ * This returns the (unsigned) sixteen bit number returned by the device in cpu
+ * endianness, or else a negative error code. Callable only from contexts that
+ * can sleep.
+ *
+ * This function is similar to spi_w8r16, with the exception that it will
+ * convert the read 16 bit data word from big-endian to native endianness.
+ *
+ */
+static inline ssize_t spi_w8r16be(struct spi_device *spi, u8 cmd)
+
+{
+	ssize_t status;
+	__be16 result;
+
+	status = spi_write_then_read(spi, &cmd, 1, &result, 2);
+	if (status < 0)
+		return status;
+
+	return be16_to_cpu(result);
+}
+
 /*---------------------------------------------------------------------------*/
 
 /*
-- 
cgit v1.2.3


From 8daaa5f8261bffd2f6217a960f9182d0503a5c44 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Wed, 2 Oct 2013 10:14:18 -0500
Subject: kdb: Add support for external NMI handler to call KGDB/KDB

This patch adds a kgdb_nmicallin() interface that can be used by
external NMI handlers to call the KGDB/KDB handler.  The primary
need for this is for those types of NMI interrupts where all the
CPUs have already received the NMI signal.  Therefore no
send_IPI(NMI) is required, and in fact it will cause a 2nd
unhandled NMI to occur. This generates the "Dazed and Confuzed"
messages.

Since all the CPUs are getting the NMI at roughly the same time,
it's not guaranteed that the first CPU that hits the NMI handler
will manage to enter KGDB and set the dbg_master_lock before the
slaves start entering. The new argument "send_ready" was added
for KGDB to signal the NMI handler to release the slave CPUs for
entry into KGDB.

Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Jason Wessel <jason.wessel@windriver.com>
Reviewed-by: Dimitri Sivanich <sivanich@sgi.com>
Reviewed-by: Hedi Berriche <hedi@sgi.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Link: http://lkml.kernel.org/r/20131002151417.928886849@asylum.americas.sgi.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kdb.h  | 1 +
 include/linux/kgdb.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kdb.h b/include/linux/kdb.h
index 7f6fe6e015bc..290db1269c4c 100644
--- a/include/linux/kdb.h
+++ b/include/linux/kdb.h
@@ -109,6 +109,7 @@ typedef enum {
 	KDB_REASON_RECURSE,	/* Recursive entry to kdb;
 				 * regs probably valid */
 	KDB_REASON_SSTEP,	/* Single Step trap. - regs valid */
+	KDB_REASON_SYSTEM_NMI,	/* In NMI due to SYSTEM cmd; regs valid */
 } kdb_reason_t;
 
 extern int kdb_trap_printk;
diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index c6e091bf39a5..dfb4f2ffdaa2 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -310,6 +310,7 @@ extern int
 kgdb_handle_exception(int ex_vector, int signo, int err_code,
 		      struct pt_regs *regs);
 extern int kgdb_nmicallback(int cpu, void *regs);
+extern int kgdb_nmicallin(int cpu, int trapnr, void *regs, atomic_t *snd_rdy);
 extern void gdbstub_exit(int status);
 
 extern int			kgdb_single_step;
-- 
cgit v1.2.3


From fdfbbd07e91f8fe387140776f3fd94605f0c89e5 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 20 Sep 2013 07:40:39 -0700
Subject: perf: Add generic transaction flags

Add a generic qualifier for transaction events, as a new sample
type that returns a flag word. This is particularly useful
for qualifying aborts: to distinguish aborts which happen
due to asynchronous events (like conflicts caused by another
CPU) versus instructions that lead to an abort.

The tuning strategies are very different for those cases,
so it's important to distinguish them easily and early.

Since it's inconvenient and inflexible to filter for this
in the kernel we report all the events out and allow
some post processing in user space.

The flags are based on the Intel TSX events, but should be fairly
generic and mostly applicable to other HTM architectures too. In addition
to various flag words there's also reserved space to report an
program supplied abort code. For TSX this is used to distinguish specific
classes of aborts, like a lock busy abort when doing lock elision.

Flags:

Elision and generic transactions 		   (ELISION vs TRANSACTION)
(HLE vs RTM on TSX; IBM etc.  would likely only use TRANSACTION)
Aborts caused by current thread vs aborts caused by others (SYNC vs ASYNC)
Retryable transaction				   (RETRY)
Conflicts with other threads			   (CONFLICT)
Transaction write capacity overflow		   (CAPACITY WRITE)
Transaction read capacity overflow		   (CAPACITY READ)

Transactions implicitely aborted can also return an abort code.
This can be used to signal specific events to the profiler. A common
case is abort on lock busy in a RTM eliding library (code 0xff)
To handle this case we include the TSX abort code

Common example aborts in TSX would be:

- Data conflict with another thread on memory read.
                                      Flags: TRANSACTION|ASYNC|CONFLICT
- executing a WRMSR in a transaction. Flags: TRANSACTION|SYNC
- HLE transaction in user space is too large
                                      Flags: ELISION|SYNC|CAPACITY-WRITE

The only flag that is somewhat TSX specific is ELISION.

This adds the perf core glue needed for reporting the new flag word out.

v2: Add MEM/MISC
v3: Move transaction to the end
v4: Separate capacity-read/write and remove misc
v5: Remove _SAMPLE. Move abort flags to 32bit. Rename
    transaction to txn
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1379688044-14173-2-git-send-email-andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c8ba627c1d60..2e069d1288df 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -584,6 +584,10 @@ struct perf_sample_data {
 	struct perf_regs_user		regs_user;
 	u64				stack_user_size;
 	u64				weight;
+	/*
+	 * Transaction flags for abort events:
+	 */
+	u64				txn;
 };
 
 static inline void perf_sample_data_init(struct perf_sample_data *data,
@@ -599,6 +603,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
 	data->stack_user_size = 0;
 	data->weight = 0;
 	data->data_src.val = 0;
+	data->txn = 0;
 }
 
 extern void perf_output_sample(struct perf_output_handle *handle,
-- 
cgit v1.2.3


From 2f2a2b60adf368bacd6acd2116c01e32caf936c4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:18 +0200
Subject: sched/wait: Make the signal_pending() checks consistent

There's two patterns to check signals in the __wait_event*() macros:

  if (!signal_pending(current)) {
	schedule();
	continue;
  }
  ret = -ERESTARTSYS;
  break;

And the more natural:

  if (signal_pending(current)) {
	ret = -ERESTARTSYS;
	break;
  }
  schedule();

Change them all into the latter form.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092527.956416254@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/tty.h  | 13 ++++++-------
 include/linux/wait.h | 35 ++++++++++++++++-------------------
 2 files changed, 22 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 64f864651d86..050372979076 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -686,14 +686,13 @@ do {									\
 		prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);	\
 		if (condition)						\
 			break;						\
-		if (!signal_pending(current)) {				\
-			tty_unlock(tty);					\
-			schedule();					\
-			tty_lock(tty);					\
-			continue;					\
+		if (signal_pending(current)) {				\
+			ret = -ERESTARTSYS;				\
+			break;						\
 		}							\
-		ret = -ERESTARTSYS;					\
-		break;							\
+		tty_unlock(tty);					\
+		schedule();						\
+		tty_lock(tty);						\
 	}								\
 	finish_wait(&wq, &__wait);					\
 } while (0)
diff --git a/include/linux/wait.h b/include/linux/wait.h
index a67fc1635592..ccf0c529fd37 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -261,12 +261,11 @@ do {									\
 		prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);	\
 		if (condition)						\
 			break;						\
-		if (!signal_pending(current)) {				\
-			schedule();					\
-			continue;					\
+		if (signal_pending(current)) {				\
+			ret = -ERESTARTSYS;				\
+			break;						\
 		}							\
-		ret = -ERESTARTSYS;					\
-		break;							\
+		schedule();						\
 	}								\
 	finish_wait(&wq, &__wait);					\
 } while (0)
@@ -302,14 +301,13 @@ do {									\
 		prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);	\
 		if (condition)						\
 			break;						\
-		if (!signal_pending(current)) {				\
-			ret = schedule_timeout(ret);			\
-			if (!ret)					\
-				break;					\
-			continue;					\
+		if (signal_pending(current)) {				\
+			ret = -ERESTARTSYS;				\
+			break;						\
 		}							\
-		ret = -ERESTARTSYS;					\
-		break;							\
+		ret = schedule_timeout(ret);				\
+		if (!ret)						\
+			break;						\
 	}								\
 	if (!ret && (condition))					\
 		ret = 1;						\
@@ -439,14 +437,13 @@ do {									\
 			finish_wait(&wq, &__wait);			\
 			break;						\
 		}							\
-		if (!signal_pending(current)) {				\
-			schedule();					\
-			continue;					\
-		}							\
-		ret = -ERESTARTSYS;					\
-		abort_exclusive_wait(&wq, &__wait, 			\
+		if (signal_pending(current)) {				\
+			ret = -ERESTARTSYS;				\
+			abort_exclusive_wait(&wq, &__wait, 		\
 				TASK_INTERRUPTIBLE, NULL);		\
-		break;							\
+			break;						\
+		}							\
+		schedule();						\
 	}								\
 } while (0)
 
-- 
cgit v1.2.3


From 2953ef246b058989657e1e77b36b67566ac06f7b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:19 +0200
Subject: sched/wait: Change timeout logic

Commit 4c663cf ("wait: fix false timeouts when using
wait_event_timeout()") introduced an additional condition check after
a timeout but there's a few issues;

 - it forgot one site
 - it put the check after the main loop; not at the actual timeout
   check.

Cure both; by wrapping the condition (as suggested by Oleg), this
avoids double evaluation of 'condition' which could be quite big.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092528.028892896@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index ccf0c529fd37..b2afd665e4ea 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -179,6 +179,14 @@ wait_queue_head_t *bit_waitqueue(void *, int);
 #define wake_up_interruptible_sync_poll(x, m)				\
 	__wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, (void *) (m))
 
+#define ___wait_cond_timeout(condition, ret)				\
+({									\
+ 	bool __cond = (condition);					\
+ 	if (__cond && !ret)						\
+ 		ret = 1;						\
+ 	__cond || !ret;							\
+})
+
 #define __wait_event(wq, condition) 					\
 do {									\
 	DEFINE_WAIT(__wait);						\
@@ -217,14 +225,10 @@ do {									\
 									\
 	for (;;) {							\
 		prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE);	\
-		if (condition)						\
+		if (___wait_cond_timeout(condition, ret))		\
 			break;						\
 		ret = schedule_timeout(ret);				\
-		if (!ret)						\
-			break;						\
 	}								\
-	if (!ret && (condition))					\
-		ret = 1;						\
 	finish_wait(&wq, &__wait);					\
 } while (0)
 
@@ -299,18 +303,14 @@ do {									\
 									\
 	for (;;) {							\
 		prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);	\
-		if (condition)						\
+		if (___wait_cond_timeout(condition, ret))		\
 			break;						\
 		if (signal_pending(current)) {				\
 			ret = -ERESTARTSYS;				\
 			break;						\
 		}							\
 		ret = schedule_timeout(ret);				\
-		if (!ret)						\
-			break;						\
 	}								\
-	if (!ret && (condition))					\
-		ret = 1;						\
 	finish_wait(&wq, &__wait);					\
 } while (0)
 
@@ -815,7 +815,7 @@ do {									\
 									\
 	for (;;) {							\
 		prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);	\
-		if (condition)						\
+		if (___wait_cond_timeout(condition, ret))		\
 			break;						\
 		if (signal_pending(current)) {				\
 			ret = -ERESTARTSYS;				\
@@ -824,8 +824,6 @@ do {									\
 		spin_unlock_irq(&lock);					\
 		ret = schedule_timeout(ret);				\
 		spin_lock_irq(&lock);					\
-		if (!ret)						\
-			break;						\
 	}								\
 	finish_wait(&wq, &__wait);					\
 } while (0)
-- 
cgit v1.2.3


From bb632bc44970f75b66df102e831a4fc0692e9159 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:20 +0200
Subject: sched/wait: Change the wait_exclusive control flow

Purely a preparatory patch; it changes the control flow to match what
will soon be generated by generic code so that that patch can be a
unity transform.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092528.107994763@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index b2afd665e4ea..7d7819dafcc5 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -428,23 +428,24 @@ do {									\
 
 #define __wait_event_interruptible_exclusive(wq, condition, ret)	\
 do {									\
+	__label__ __out;						\
 	DEFINE_WAIT(__wait);						\
 									\
 	for (;;) {							\
 		prepare_to_wait_exclusive(&wq, &__wait,			\
 					TASK_INTERRUPTIBLE);		\
-		if (condition) {					\
-			finish_wait(&wq, &__wait);			\
+		if (condition)						\
 			break;						\
-		}							\
 		if (signal_pending(current)) {				\
 			ret = -ERESTARTSYS;				\
 			abort_exclusive_wait(&wq, &__wait, 		\
 				TASK_INTERRUPTIBLE, NULL);		\
-			break;						\
+			goto __out;					\
 		}							\
 		schedule();						\
 	}								\
+	finish_wait(&wq, &__wait);					\
+__out:	;								\
 } while (0)
 
 #define wait_event_interruptible_exclusive(wq, condition)		\
-- 
cgit v1.2.3


From 41a1431b178c3b731d6dfc40b987528b333dd93e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:21 +0200
Subject: sched/wait: Introduce ___wait_event()

There's far too much duplication in the __wait_event macros; in order
to fix this introduce ___wait_event() a macro with the capability to
replace most other macros.

With the previous patches changing the various __wait_event*()
implementations to be more uniform; we can now collapse the lot
without also changing generated code.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092528.181897111@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 7d7819dafcc5..29d0249e03ab 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -187,6 +187,42 @@ wait_queue_head_t *bit_waitqueue(void *, int);
  	__cond || !ret;							\
 })
 
+#define ___wait_signal_pending(state)					\
+	((state == TASK_INTERRUPTIBLE && signal_pending(current)) ||	\
+	 (state == TASK_KILLABLE && fatal_signal_pending(current)))
+
+#define ___wait_nop_ret		int ret __always_unused
+
+#define ___wait_event(wq, condition, state, exclusive, ret, cmd)	\
+do {									\
+	__label__ __out;						\
+	DEFINE_WAIT(__wait);						\
+									\
+	for (;;) {							\
+		if (exclusive)						\
+			prepare_to_wait_exclusive(&wq, &__wait, state); \
+		else							\
+			prepare_to_wait(&wq, &__wait, state);		\
+									\
+		if (condition)						\
+			break;						\
+									\
+		if (___wait_signal_pending(state)) {			\
+			ret = -ERESTARTSYS;				\
+			if (exclusive) {				\
+				abort_exclusive_wait(&wq, &__wait, 	\
+						     state, NULL); 	\
+				goto __out;				\
+			}						\
+			break;						\
+		}							\
+									\
+		cmd;							\
+	}								\
+	finish_wait(&wq, &__wait);					\
+__out:	;								\
+} while (0)
+
 #define __wait_event(wq, condition) 					\
 do {									\
 	DEFINE_WAIT(__wait);						\
-- 
cgit v1.2.3


From 854267f4384243b19c03a2942e84f06f2beb0952 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:22 +0200
Subject: sched/wait: Collapse __wait_event()

Reduce macro complexity by using the new ___wait_event() helper.
No change in behaviour, identical generated code.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092528.254863348@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 29d0249e03ab..68e3a628e157 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -224,17 +224,8 @@ __out:	;								\
 } while (0)
 
 #define __wait_event(wq, condition) 					\
-do {									\
-	DEFINE_WAIT(__wait);						\
-									\
-	for (;;) {							\
-		prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE);	\
-		if (condition)						\
-			break;						\
-		schedule();						\
-	}								\
-	finish_wait(&wq, &__wait);					\
-} while (0)
+	___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0,		\
+		      ___wait_nop_ret, schedule())
 
 /**
  * wait_event - sleep until a condition gets true
-- 
cgit v1.2.3


From ddc1994b8217527e1818f690f17597fc9cedf81b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:23 +0200
Subject: sched/wait: Collapse __wait_event_timeout()

Reduce macro complexity by using the new ___wait_event() helper.
No change in behaviour, identical generated code.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092528.325264677@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 68e3a628e157..546b94efc82e 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -247,17 +247,9 @@ do {									\
 } while (0)
 
 #define __wait_event_timeout(wq, condition, ret)			\
-do {									\
-	DEFINE_WAIT(__wait);						\
-									\
-	for (;;) {							\
-		prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE);	\
-		if (___wait_cond_timeout(condition, ret))		\
-			break;						\
-		ret = schedule_timeout(ret);				\
-	}								\
-	finish_wait(&wq, &__wait);					\
-} while (0)
+	___wait_event(wq, ___wait_cond_timeout(condition, ret), 	\
+		      TASK_UNINTERRUPTIBLE, 0, ret,			\
+		      ret = schedule_timeout(ret))
 
 /**
  * wait_event_timeout - sleep until a condition gets true or a timeout elapses
-- 
cgit v1.2.3


From f13f4c41c9cf9cd61c896e46e4e7ba2687e2af9c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:24 +0200
Subject: sched/wait: Collapse __wait_event_interruptible()

Reduce macro complexity by using the new ___wait_event() helper.
No change in behaviour, identical generated code.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092528.396949919@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 546b94efc82e..39e4bbd2c735 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -277,21 +277,8 @@ do {									\
 })
 
 #define __wait_event_interruptible(wq, condition, ret)			\
-do {									\
-	DEFINE_WAIT(__wait);						\
-									\
-	for (;;) {							\
-		prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);	\
-		if (condition)						\
-			break;						\
-		if (signal_pending(current)) {				\
-			ret = -ERESTARTSYS;				\
-			break;						\
-		}							\
-		schedule();						\
-	}								\
-	finish_wait(&wq, &__wait);					\
-} while (0)
+	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, ret,	\
+		      schedule())
 
 /**
  * wait_event_interruptible - sleep until a condition gets true
-- 
cgit v1.2.3


From c2ebb1fb4eddf3d1d66fe31d1e89e83ee211b81c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:25 +0200
Subject: sched/wait: Collapse __wait_event_interruptible_timeout()

Reduce macro complexity by using the new ___wait_event() helper.
No change in behaviour, identical generated code.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092528.469616907@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 39e4bbd2c735..a79fb15c1dd4 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -304,21 +304,9 @@ do {									\
 })
 
 #define __wait_event_interruptible_timeout(wq, condition, ret)		\
-do {									\
-	DEFINE_WAIT(__wait);						\
-									\
-	for (;;) {							\
-		prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);	\
-		if (___wait_cond_timeout(condition, ret))		\
-			break;						\
-		if (signal_pending(current)) {				\
-			ret = -ERESTARTSYS;				\
-			break;						\
-		}							\
-		ret = schedule_timeout(ret);				\
-	}								\
-	finish_wait(&wq, &__wait);					\
-} while (0)
+	___wait_event(wq, ___wait_cond_timeout(condition, ret),		\
+		      TASK_INTERRUPTIBLE, 0, ret,			\
+		      ret = schedule_timeout(ret))
 
 /**
  * wait_event_interruptible_timeout - sleep until a condition gets true or a timeout elapses
-- 
cgit v1.2.3


From 48c2521717b39cb6904941ec2847d9775669207a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:26 +0200
Subject: sched/wait: Collapse __wait_event_interruptible_exclusive()

Reduce macro complexity by using the new ___wait_event() helper.
No change in behaviour, identical generated code.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092528.541716442@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 22 ++--------------------
 1 file changed, 2 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index a79fb15c1dd4..c4ab172daac0 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -421,26 +421,8 @@ do {									\
 })
 
 #define __wait_event_interruptible_exclusive(wq, condition, ret)	\
-do {									\
-	__label__ __out;						\
-	DEFINE_WAIT(__wait);						\
-									\
-	for (;;) {							\
-		prepare_to_wait_exclusive(&wq, &__wait,			\
-					TASK_INTERRUPTIBLE);		\
-		if (condition)						\
-			break;						\
-		if (signal_pending(current)) {				\
-			ret = -ERESTARTSYS;				\
-			abort_exclusive_wait(&wq, &__wait, 		\
-				TASK_INTERRUPTIBLE, NULL);		\
-			goto __out;					\
-		}							\
-		schedule();						\
-	}								\
-	finish_wait(&wq, &__wait);					\
-__out:	;								\
-} while (0)
+	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, ret,	\
+		      schedule())
 
 #define wait_event_interruptible_exclusive(wq, condition)		\
 ({									\
-- 
cgit v1.2.3


From 13cb5042a4b80396f77cf5d599d2c002c57b89dc Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:27 +0200
Subject: sched/wait: Collapse __wait_event_lock_irq()

Reduce macro complexity by using the new ___wait_event() helper.
No change in behaviour, identical generated code.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092528.612813379@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index c4ab172daac0..d64918e09e3c 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -624,20 +624,12 @@ do {									\
 
 
 #define __wait_event_lock_irq(wq, condition, lock, cmd)			\
-do {									\
-	DEFINE_WAIT(__wait);						\
-									\
-	for (;;) {							\
-		prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE);	\
-		if (condition)						\
-			break;						\
-		spin_unlock_irq(&lock);					\
-		cmd;							\
-		schedule();						\
-		spin_lock_irq(&lock);					\
-	}								\
-	finish_wait(&wq, &__wait);					\
-} while (0)
+	___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0,		\
+		      ___wait_nop_ret,					\
+		      spin_unlock_irq(&lock);				\
+		      cmd;						\
+		      schedule();					\
+		      spin_lock_irq(&lock))
 
 /**
  * wait_event_lock_irq_cmd - sleep until a condition gets true. The
-- 
cgit v1.2.3


From 8fbd88fa1717601ef91ced49a32f24786b167065 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:28 +0200
Subject: sched/wait: Collapse __wait_event_interruptible_lock_irq()

Reduce macro complexity by using the new ___wait_event() helper.
No change in behaviour, identical generated code.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092528.686006009@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 26 ++++++--------------------
 1 file changed, 6 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index d64918e09e3c..a577a85004ae 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -689,26 +689,12 @@ do {									\
 } while (0)
 
 
-#define __wait_event_interruptible_lock_irq(wq, condition,		\
-					    lock, ret, cmd)		\
-do {									\
-	DEFINE_WAIT(__wait);						\
-									\
-	for (;;) {							\
-		prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);	\
-		if (condition)						\
-			break;						\
-		if (signal_pending(current)) {				\
-			ret = -ERESTARTSYS;				\
-			break;						\
-		}							\
-		spin_unlock_irq(&lock);					\
-		cmd;							\
-		schedule();						\
-		spin_lock_irq(&lock);					\
-	}								\
-	finish_wait(&wq, &__wait);					\
-} while (0)
+#define __wait_event_interruptible_lock_irq(wq, condition, lock, ret, cmd) \
+	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, ret,	   \
+		      spin_unlock_irq(&lock);				   \
+		      cmd;						   \
+		      schedule();					   \
+		      spin_lock_irq(&lock))
 
 /**
  * wait_event_interruptible_lock_irq_cmd - sleep until a condition gets true.
-- 
cgit v1.2.3


From a1dc6852ac5eecdcd3122ae01703183a3e88e979 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:29 +0200
Subject: sched/wait: Collapse __wait_event_interruptible_lock_irq_timeout()

Reduce macro complexity by using the new ___wait_event() helper.
No change in behaviour, identical generated code.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092528.759956109@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 25 ++++++-------------------
 1 file changed, 6 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index a577a85004ae..5d5408b08ba5 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -763,25 +763,12 @@ do {									\
 	__ret;								\
 })
 
-#define __wait_event_interruptible_lock_irq_timeout(wq, condition,	\
-						    lock, ret)		\
-do {									\
-	DEFINE_WAIT(__wait);						\
-									\
-	for (;;) {							\
-		prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);	\
-		if (___wait_cond_timeout(condition, ret))		\
-			break;						\
-		if (signal_pending(current)) {				\
-			ret = -ERESTARTSYS;				\
-			break;						\
-		}							\
-		spin_unlock_irq(&lock);					\
-		ret = schedule_timeout(ret);				\
-		spin_lock_irq(&lock);					\
-	}								\
-	finish_wait(&wq, &__wait);					\
-} while (0)
+#define __wait_event_interruptible_lock_irq_timeout(wq, condition, lock, ret) \
+	___wait_event(wq, ___wait_cond_timeout(condition, ret),		      \
+		      TASK_INTERRUPTIBLE, 0, ret,	      		      \
+		      spin_unlock_irq(&lock);				      \
+		      ret = schedule_timeout(ret);			      \
+		      spin_lock_irq(&lock));
 
 /**
  * wait_event_interruptible_lock_irq_timeout - sleep until a condition gets true or a timeout elapses.
-- 
cgit v1.2.3


From 0d1e1c8a430450a3ce61a842cec64f9e2a9f3b05 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:30 +0200
Subject: sched/wait: Collapse __wait_event_interruptible_tty()

Reduce macro complexity by using the new ___wait_event() helper.
No change in behaviour, identical generated code.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092528.831085521@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/tty.h | 21 ++++-----------------
 1 file changed, 4 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 050372979076..6e803291028f 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -679,23 +679,10 @@ static inline void tty_wait_until_sent_from_close(struct tty_struct *tty,
 })
 
 #define __wait_event_interruptible_tty(tty, wq, condition, ret)		\
-do {									\
-	DEFINE_WAIT(__wait);						\
-									\
-	for (;;) {							\
-		prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);	\
-		if (condition)						\
-			break;						\
-		if (signal_pending(current)) {				\
-			ret = -ERESTARTSYS;				\
-			break;						\
-		}							\
-		tty_unlock(tty);					\
-		schedule();						\
-		tty_lock(tty);						\
-	}								\
-	finish_wait(&wq, &__wait);					\
-} while (0)
+	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, ret,	\
+			tty_unlock(tty);				\
+			schedule();					\
+			tty_lock(tty))
 
 #ifdef CONFIG_PROC_FS
 extern void proc_tty_register_driver(struct tty_driver *);
-- 
cgit v1.2.3


From cf7361fd961b6f0510572af6cf8ca3ffba07018b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:31 +0200
Subject: sched/wait: Collapse __wait_event_killable()

Reduce macro complexity by using the new ___wait_event() helper.
No change in behaviour, identical generated code.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092528.898691966@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 5d5408b08ba5..ec3683ee0fc2 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -582,22 +582,7 @@ do {									\
 
 
 #define __wait_event_killable(wq, condition, ret)			\
-do {									\
-	DEFINE_WAIT(__wait);						\
-									\
-	for (;;) {							\
-		prepare_to_wait(&wq, &__wait, TASK_KILLABLE);		\
-		if (condition)						\
-			break;						\
-		if (!fatal_signal_pending(current)) {			\
-			schedule();					\
-			continue;					\
-		}							\
-		ret = -ERESTARTSYS;					\
-		break;							\
-	}								\
-	finish_wait(&wq, &__wait);					\
-} while (0)
+	___wait_event(wq, condition, TASK_KILLABLE, 0, ret, schedule())
 
 /**
  * wait_event_killable - sleep until a condition gets true
-- 
cgit v1.2.3


From ebdc195f2ec68576876216081035293e37318e86 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:32 +0200
Subject: sched/wait: Collapse __wait_event_hrtimeout()

While not a whole-sale replacement like the others we can still reduce
the size of __wait_event_hrtimeout() considerably by noting that the
actual core of __wait_event_hrtimeout() is identical to what
___wait_event() generates.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092528.972793648@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index ec3683ee0fc2..c065e8af9749 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -337,7 +337,6 @@ do {									\
 #define __wait_event_hrtimeout(wq, condition, timeout, state)		\
 ({									\
 	int __ret = 0;							\
-	DEFINE_WAIT(__wait);						\
 	struct hrtimer_sleeper __t;					\
 									\
 	hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC,		\
@@ -348,25 +347,15 @@ do {									\
 				       current->timer_slack_ns,		\
 				       HRTIMER_MODE_REL);		\
 									\
-	for (;;) {							\
-		prepare_to_wait(&wq, &__wait, state);			\
-		if (condition)						\
-			break;						\
-		if (state == TASK_INTERRUPTIBLE &&			\
-		    signal_pending(current)) {				\
-			__ret = -ERESTARTSYS;				\
-			break;						\
-		}							\
+	___wait_event(wq, condition, state, 0, __ret,			\
 		if (!__t.task) {					\
 			__ret = -ETIME;					\
 			break;						\
 		}							\
-		schedule();						\
-	}								\
+		schedule());						\
 									\
 	hrtimer_cancel(&__t.timer);					\
 	destroy_hrtimer_on_stack(&__t.timer);				\
-	finish_wait(&wq, &__wait);					\
 	__ret;								\
 })
 
-- 
cgit v1.2.3


From 35a2af94c7ce7130ca292c68b1d27fcfdb648f6b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Oct 2013 11:22:33 +0200
Subject: sched/wait: Make the __wait_event*() interface more friendly

Change all __wait_event*() implementations to match the corresponding
wait_event*() signature for convenience.

In particular this does away with the weird 'ret' logic. Since there
are __wait_event*() users this requires we update them too.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131002092529.042563462@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/tty.h  |  10 ++---
 include/linux/wait.h | 113 +++++++++++++++++++++++++--------------------------
 2 files changed, 60 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 6e803291028f..633cac77f9f9 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -672,14 +672,14 @@ static inline void tty_wait_until_sent_from_close(struct tty_struct *tty,
 #define wait_event_interruptible_tty(tty, wq, condition)		\
 ({									\
 	int __ret = 0;							\
-	if (!(condition)) {						\
-		__wait_event_interruptible_tty(tty, wq, condition, __ret);	\
-	}								\
+	if (!(condition))						\
+		__ret = __wait_event_interruptible_tty(tty, wq,		\
+						       condition);	\
 	__ret;								\
 })
 
-#define __wait_event_interruptible_tty(tty, wq, condition, ret)		\
-	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, ret,	\
+#define __wait_event_interruptible_tty(tty, wq, condition)		\
+	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,		\
 			tty_unlock(tty);				\
 			schedule();					\
 			tty_lock(tty))
diff --git a/include/linux/wait.h b/include/linux/wait.h
index c065e8af9749..bd4bd7b479b6 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -179,24 +179,23 @@ wait_queue_head_t *bit_waitqueue(void *, int);
 #define wake_up_interruptible_sync_poll(x, m)				\
 	__wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, (void *) (m))
 
-#define ___wait_cond_timeout(condition, ret)				\
+#define ___wait_cond_timeout(condition)					\
 ({									\
  	bool __cond = (condition);					\
- 	if (__cond && !ret)						\
- 		ret = 1;						\
- 	__cond || !ret;							\
+ 	if (__cond && !__ret)						\
+ 		__ret = 1;						\
+ 	__cond || !__ret;						\
 })
 
 #define ___wait_signal_pending(state)					\
 	((state == TASK_INTERRUPTIBLE && signal_pending(current)) ||	\
 	 (state == TASK_KILLABLE && fatal_signal_pending(current)))
 
-#define ___wait_nop_ret		int ret __always_unused
-
 #define ___wait_event(wq, condition, state, exclusive, ret, cmd)	\
-do {									\
+({									\
 	__label__ __out;						\
 	DEFINE_WAIT(__wait);						\
+	long __ret = ret;						\
 									\
 	for (;;) {							\
 		if (exclusive)						\
@@ -208,7 +207,7 @@ do {									\
 			break;						\
 									\
 		if (___wait_signal_pending(state)) {			\
-			ret = -ERESTARTSYS;				\
+			__ret = -ERESTARTSYS;				\
 			if (exclusive) {				\
 				abort_exclusive_wait(&wq, &__wait, 	\
 						     state, NULL); 	\
@@ -220,12 +219,12 @@ do {									\
 		cmd;							\
 	}								\
 	finish_wait(&wq, &__wait);					\
-__out:	;								\
-} while (0)
+__out:	__ret;								\
+})
 
 #define __wait_event(wq, condition) 					\
-	___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0,		\
-		      ___wait_nop_ret, schedule())
+	(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
+			    schedule())
 
 /**
  * wait_event - sleep until a condition gets true
@@ -246,10 +245,10 @@ do {									\
 	__wait_event(wq, condition);					\
 } while (0)
 
-#define __wait_event_timeout(wq, condition, ret)			\
-	___wait_event(wq, ___wait_cond_timeout(condition, ret), 	\
-		      TASK_UNINTERRUPTIBLE, 0, ret,			\
-		      ret = schedule_timeout(ret))
+#define __wait_event_timeout(wq, condition, timeout)			\
+	___wait_event(wq, ___wait_cond_timeout(condition),		\
+		      TASK_UNINTERRUPTIBLE, 0, timeout,			\
+		      __ret = schedule_timeout(__ret))
 
 /**
  * wait_event_timeout - sleep until a condition gets true or a timeout elapses
@@ -272,12 +271,12 @@ do {									\
 ({									\
 	long __ret = timeout;						\
 	if (!(condition)) 						\
-		__wait_event_timeout(wq, condition, __ret);		\
+		__ret = __wait_event_timeout(wq, condition, timeout);	\
 	__ret;								\
 })
 
-#define __wait_event_interruptible(wq, condition, ret)			\
-	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, ret,	\
+#define __wait_event_interruptible(wq, condition)			\
+	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,		\
 		      schedule())
 
 /**
@@ -299,14 +298,14 @@ do {									\
 ({									\
 	int __ret = 0;							\
 	if (!(condition))						\
-		__wait_event_interruptible(wq, condition, __ret);	\
+		__ret = __wait_event_interruptible(wq, condition);	\
 	__ret;								\
 })
 
-#define __wait_event_interruptible_timeout(wq, condition, ret)		\
-	___wait_event(wq, ___wait_cond_timeout(condition, ret),		\
-		      TASK_INTERRUPTIBLE, 0, ret,			\
-		      ret = schedule_timeout(ret))
+#define __wait_event_interruptible_timeout(wq, condition, timeout)	\
+	___wait_event(wq, ___wait_cond_timeout(condition),		\
+		      TASK_INTERRUPTIBLE, 0, timeout,			\
+		      __ret = schedule_timeout(__ret))
 
 /**
  * wait_event_interruptible_timeout - sleep until a condition gets true or a timeout elapses
@@ -330,7 +329,8 @@ do {									\
 ({									\
 	long __ret = timeout;						\
 	if (!(condition))						\
-		__wait_event_interruptible_timeout(wq, condition, __ret); \
+		__ret = __wait_event_interruptible_timeout(wq, 		\
+						condition, timeout);	\
 	__ret;								\
 })
 
@@ -347,7 +347,7 @@ do {									\
 				       current->timer_slack_ns,		\
 				       HRTIMER_MODE_REL);		\
 									\
-	___wait_event(wq, condition, state, 0, __ret,			\
+	__ret = ___wait_event(wq, condition, state, 0, 0,		\
 		if (!__t.task) {					\
 			__ret = -ETIME;					\
 			break;						\
@@ -409,15 +409,15 @@ do {									\
 	__ret;								\
 })
 
-#define __wait_event_interruptible_exclusive(wq, condition, ret)	\
-	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, ret,	\
+#define __wait_event_interruptible_exclusive(wq, condition)		\
+	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0,		\
 		      schedule())
 
 #define wait_event_interruptible_exclusive(wq, condition)		\
 ({									\
 	int __ret = 0;							\
 	if (!(condition))						\
-		__wait_event_interruptible_exclusive(wq, condition, __ret);\
+		__ret = __wait_event_interruptible_exclusive(wq, condition);\
 	__ret;								\
 })
 
@@ -570,8 +570,8 @@ do {									\
 
 
-#define __wait_event_killable(wq, condition, ret)			\
-	___wait_event(wq, condition, TASK_KILLABLE, 0, ret, schedule())
+#define __wait_event_killable(wq, condition)				\
+	___wait_event(wq, condition, TASK_KILLABLE, 0, 0, schedule())
 
 /**
  * wait_event_killable - sleep until a condition gets true
@@ -592,18 +592,17 @@ do {									\
 ({									\
 	int __ret = 0;							\
 	if (!(condition))						\
-		__wait_event_killable(wq, condition, __ret);		\
+		__ret = __wait_event_killable(wq, condition);		\
 	__ret;								\
 })
 
 
 #define __wait_event_lock_irq(wq, condition, lock, cmd)			\
-	___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0,		\
-		      ___wait_nop_ret,					\
-		      spin_unlock_irq(&lock);				\
-		      cmd;						\
-		      schedule();					\
-		      spin_lock_irq(&lock))
+	(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
+			    spin_unlock_irq(&lock);			\
+			    cmd;					\
+			    schedule();					\
+			    spin_lock_irq(&lock))
 
 /**
  * wait_event_lock_irq_cmd - sleep until a condition gets true. The
@@ -663,11 +662,11 @@ do {									\
 } while (0)
 
 
-#define __wait_event_interruptible_lock_irq(wq, condition, lock, ret, cmd) \
-	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, ret,	   \
-		      spin_unlock_irq(&lock);				   \
-		      cmd;						   \
-		      schedule();					   \
+#define __wait_event_interruptible_lock_irq(wq, condition, lock, cmd)	\
+	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,	   	\
+		      spin_unlock_irq(&lock);				\
+		      cmd;						\
+		      schedule();					\
 		      spin_lock_irq(&lock))
 
 /**
@@ -698,10 +697,9 @@ do {									\
 #define wait_event_interruptible_lock_irq_cmd(wq, condition, lock, cmd)	\
 ({									\
 	int __ret = 0;							\
-									\
 	if (!(condition))						\
-		__wait_event_interruptible_lock_irq(wq, condition,	\
-						    lock, __ret, cmd);	\
+		__ret = __wait_event_interruptible_lock_irq(wq, 	\
+						condition, lock, cmd);	\
 	__ret;								\
 })
 
@@ -730,18 +728,18 @@ do {									\
 #define wait_event_interruptible_lock_irq(wq, condition, lock)		\
 ({									\
 	int __ret = 0;							\
-									\
 	if (!(condition))						\
-		__wait_event_interruptible_lock_irq(wq, condition,	\
-						    lock, __ret, );	\
+		__ret = __wait_event_interruptible_lock_irq(wq,		\
+						condition, lock,)	\
 	__ret;								\
 })
 
-#define __wait_event_interruptible_lock_irq_timeout(wq, condition, lock, ret) \
-	___wait_event(wq, ___wait_cond_timeout(condition, ret),		      \
-		      TASK_INTERRUPTIBLE, 0, ret,	      		      \
-		      spin_unlock_irq(&lock);				      \
-		      ret = schedule_timeout(ret);			      \
+#define __wait_event_interruptible_lock_irq_timeout(wq, condition, 	\
+						    lock, timeout) 	\
+	___wait_event(wq, ___wait_cond_timeout(condition),		\
+		      TASK_INTERRUPTIBLE, 0, ret,	      		\
+		      spin_unlock_irq(&lock);				\
+		      __ret = schedule_timeout(__ret);			\
 		      spin_lock_irq(&lock));
 
 /**
@@ -771,11 +769,10 @@ do {									\
 #define wait_event_interruptible_lock_irq_timeout(wq, condition, lock,	\
 						  timeout)		\
 ({									\
-	int __ret = timeout;						\
-									\
+	long __ret = timeout;						\
 	if (!(condition))						\
-		__wait_event_interruptible_lock_irq_timeout(		\
-					wq, condition, lock, __ret);	\
+		__ret = __wait_event_interruptible_lock_irq_timeout(	\
+					wq, condition, lock, timeout);	\
 	__ret;								\
 })
 
-- 
cgit v1.2.3


From fb869b6e91a3ac235f237f73305ecf34cdc4969b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 4 Oct 2013 10:24:49 +0200
Subject: sched/wait: Clean up wait.h details a bit

Since we are changing wait.h profoundly, use the opportunity to:

 - add a sentence to explain what this file is about
 - remove whitespace noise
 - prettify weird looking line break fixup attempts
 - standardize type definition and initialization sequences
 - use consistent style details

No code is changed.

Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/n/tip-O8dIie5swnctqpupakatvqyq@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 124 +++++++++++++++++++++++++--------------------------
 1 file changed, 60 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index bd4bd7b479b6..a2726c7dd244 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -1,7 +1,8 @@
 #ifndef _LINUX_WAIT_H
 #define _LINUX_WAIT_H
-
-
+/*
+ * Linux wait queue related types and methods
+ */
 #include <linux/list.h>
 #include <linux/stddef.h>
 #include <linux/spinlock.h>
@@ -13,27 +14,27 @@ typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, v
 int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key);
 
 struct __wait_queue {
-	unsigned int flags;
+	unsigned int		flags;
 #define WQ_FLAG_EXCLUSIVE	0x01
-	void *private;
-	wait_queue_func_t func;
-	struct list_head task_list;
+	void			*private;
+	wait_queue_func_t	func;
+	struct list_head	task_list;
 };
 
 struct wait_bit_key {
-	void *flags;
-	int bit_nr;
-#define WAIT_ATOMIC_T_BIT_NR -1
+	void			*flags;
+	int			bit_nr;
+#define WAIT_ATOMIC_T_BIT_NR	-1
 };
 
 struct wait_bit_queue {
-	struct wait_bit_key key;
-	wait_queue_t wait;
+	struct wait_bit_key	key;
+	wait_queue_t		wait;
 };
 
 struct __wait_queue_head {
-	spinlock_t lock;
-	struct list_head task_list;
+	spinlock_t		lock;
+	struct list_head	task_list;
 };
 typedef struct __wait_queue_head wait_queue_head_t;
 
@@ -84,17 +85,17 @@ extern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct
 
 static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
 {
-	q->flags = 0;
-	q->private = p;
-	q->func = default_wake_function;
+	q->flags	= 0;
+	q->private	= p;
+	q->func		= default_wake_function;
 }
 
-static inline void init_waitqueue_func_entry(wait_queue_t *q,
-					wait_queue_func_t func)
+static inline void
+init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func)
 {
-	q->flags = 0;
-	q->private = NULL;
-	q->func = func;
+	q->flags	= 0;
+	q->private	= NULL;
+	q->func		= func;
 }
 
 static inline int waitqueue_active(wait_queue_head_t *q)
@@ -114,8 +115,8 @@ static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
 /*
  * Used for wake-one threads:
  */
-static inline void __add_wait_queue_exclusive(wait_queue_head_t *q,
-					      wait_queue_t *wait)
+static inline void
+__add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
 {
 	wait->flags |= WQ_FLAG_EXCLUSIVE;
 	__add_wait_queue(q, wait);
@@ -127,23 +128,22 @@ static inline void __add_wait_queue_tail(wait_queue_head_t *head,
 	list_add_tail(&new->task_list, &head->task_list);
 }
 
-static inline void __add_wait_queue_tail_exclusive(wait_queue_head_t *q,
-					      wait_queue_t *wait)
+static inline void
+__add_wait_queue_tail_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
 {
 	wait->flags |= WQ_FLAG_EXCLUSIVE;
 	__add_wait_queue_tail(q, wait);
 }
 
-static inline void __remove_wait_queue(wait_queue_head_t *head,
-							wait_queue_t *old)
+static inline void
+__remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old)
 {
 	list_del(&old->task_list);
 }
 
 void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
 void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key);
-void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr,
-			void *key);
+void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
 void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr);
 void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr);
 void __wake_up_bit(wait_queue_head_t *, void *, int);
@@ -170,21 +170,21 @@ wait_queue_head_t *bit_waitqueue(void *, int);
 /*
  * Wakeup macros to be used to report events to the targets.
  */
-#define wake_up_poll(x, m)				\
+#define wake_up_poll(x, m)						\
 	__wake_up(x, TASK_NORMAL, 1, (void *) (m))
-#define wake_up_locked_poll(x, m)				\
+#define wake_up_locked_poll(x, m)					\
 	__wake_up_locked_key((x), TASK_NORMAL, (void *) (m))
-#define wake_up_interruptible_poll(x, m)			\
+#define wake_up_interruptible_poll(x, m)				\
 	__wake_up(x, TASK_INTERRUPTIBLE, 1, (void *) (m))
 #define wake_up_interruptible_sync_poll(x, m)				\
 	__wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, (void *) (m))
 
 #define ___wait_cond_timeout(condition)					\
 ({									\
- 	bool __cond = (condition);					\
- 	if (__cond && !__ret)						\
- 		__ret = 1;						\
- 	__cond || !__ret;						\
+	bool __cond = (condition);					\
+	if (__cond && !__ret)						\
+		__ret = 1;						\
+	__cond || !__ret;						\
 })
 
 #define ___wait_signal_pending(state)					\
@@ -209,8 +209,8 @@ wait_queue_head_t *bit_waitqueue(void *, int);
 		if (___wait_signal_pending(state)) {			\
 			__ret = -ERESTARTSYS;				\
 			if (exclusive) {				\
-				abort_exclusive_wait(&wq, &__wait, 	\
-						     state, NULL); 	\
+				abort_exclusive_wait(&wq, &__wait,	\
+						     state, NULL);	\
 				goto __out;				\
 			}						\
 			break;						\
@@ -222,7 +222,7 @@ wait_queue_head_t *bit_waitqueue(void *, int);
 __out:	__ret;								\
 })
 
-#define __wait_event(wq, condition) 					\
+#define __wait_event(wq, condition)					\
 	(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
 			    schedule())
 
@@ -238,9 +238,9 @@ __out:	__ret;								\
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
  */
-#define wait_event(wq, condition) 					\
+#define wait_event(wq, condition)					\
 do {									\
-	if (condition)	 						\
+	if (condition)							\
 		break;							\
 	__wait_event(wq, condition);					\
 } while (0)
@@ -270,7 +270,7 @@ do {									\
 #define wait_event_timeout(wq, condition, timeout)			\
 ({									\
 	long __ret = timeout;						\
-	if (!(condition)) 						\
+	if (!(condition))						\
 		__ret = __wait_event_timeout(wq, condition, timeout);	\
 	__ret;								\
 })
@@ -329,7 +329,7 @@ do {									\
 ({									\
 	long __ret = timeout;						\
 	if (!(condition))						\
-		__ret = __wait_event_interruptible_timeout(wq, 		\
+		__ret = __wait_event_interruptible_timeout(wq,		\
 						condition, timeout);	\
 	__ret;								\
 })
@@ -569,7 +569,6 @@ do {									\
 	 ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 1))
 
 
-
 #define __wait_event_killable(wq, condition)				\
 	___wait_event(wq, condition, TASK_KILLABLE, 0, 0, schedule())
 
@@ -663,7 +662,7 @@ do {									\
 
 
 #define __wait_event_interruptible_lock_irq(wq, condition, lock, cmd)	\
-	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,	   	\
+	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,		\
 		      spin_unlock_irq(&lock);				\
 		      cmd;						\
 		      schedule();					\
@@ -698,7 +697,7 @@ do {									\
 ({									\
 	int __ret = 0;							\
 	if (!(condition))						\
-		__ret = __wait_event_interruptible_lock_irq(wq, 	\
+		__ret = __wait_event_interruptible_lock_irq(wq,		\
 						condition, lock, cmd);	\
 	__ret;								\
 })
@@ -734,18 +733,18 @@ do {									\
 	__ret;								\
 })
 
-#define __wait_event_interruptible_lock_irq_timeout(wq, condition, 	\
-						    lock, timeout) 	\
+#define __wait_event_interruptible_lock_irq_timeout(wq, condition,	\
+						    lock, timeout)	\
 	___wait_event(wq, ___wait_cond_timeout(condition),		\
-		      TASK_INTERRUPTIBLE, 0, ret,	      		\
+		      TASK_INTERRUPTIBLE, 0, ret,			\
 		      spin_unlock_irq(&lock);				\
 		      __ret = schedule_timeout(__ret);			\
 		      spin_lock_irq(&lock));
 
 /**
- * wait_event_interruptible_lock_irq_timeout - sleep until a condition gets true or a timeout elapses.
- *		The condition is checked under the lock. This is expected
- *		to be called with the lock taken.
+ * wait_event_interruptible_lock_irq_timeout - sleep until a condition gets
+ *		true or a timeout elapses. The condition is checked under
+ *		the lock. This is expected to be called with the lock taken.
  * @wq: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @lock: a locked spinlock_t, which will be released before schedule()
@@ -783,11 +782,9 @@ do {									\
  * We plan to remove these interfaces.
  */
 extern void sleep_on(wait_queue_head_t *q);
-extern long sleep_on_timeout(wait_queue_head_t *q,
-				      signed long timeout);
+extern long sleep_on_timeout(wait_queue_head_t *q, signed long timeout);
 extern void interruptible_sleep_on(wait_queue_head_t *q);
-extern long interruptible_sleep_on_timeout(wait_queue_head_t *q,
-					   signed long timeout);
+extern long interruptible_sleep_on_timeout(wait_queue_head_t *q, signed long timeout);
 
 /*
  * Waitqueues which are removed from the waitqueue_head at wakeup time
@@ -795,8 +792,7 @@ extern long interruptible_sleep_on_timeout(wait_queue_head_t *q,
 void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state);
 void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state);
 void finish_wait(wait_queue_head_t *q, wait_queue_t *wait);
-void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
-			unsigned int mode, void *key);
+void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, unsigned int mode, void *key);
 int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
 int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
 
@@ -842,8 +838,8 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
  * One uses wait_on_bit() where one is waiting for the bit to clear,
  * but has no intention of setting it.
  */
-static inline int wait_on_bit(void *word, int bit,
-				int (*action)(void *), unsigned mode)
+static inline int
+wait_on_bit(void *word, int bit, int (*action)(void *), unsigned mode)
 {
 	if (!test_bit(bit, word))
 		return 0;
@@ -866,8 +862,8 @@ static inline int wait_on_bit(void *word, int bit,
  * One uses wait_on_bit_lock() where one is waiting for the bit to
  * clear with the intention of setting it, and when done, clearing it.
  */
-static inline int wait_on_bit_lock(void *word, int bit,
-				int (*action)(void *), unsigned mode)
+static inline int
+wait_on_bit_lock(void *word, int bit, int (*action)(void *), unsigned mode)
 {
 	if (!test_and_set_bit(bit, word))
 		return 0;
@@ -891,5 +887,5 @@ int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode)
 		return 0;
 	return out_of_line_wait_on_atomic_t(val, action, mode);
 }
-	
-#endif
+
+#endif /* _LINUX_WAIT_H */
-- 
cgit v1.2.3


From f61027426a5bc7093aa8359a411b053a35bb4b68 Mon Sep 17 00:00:00 2001
From: Mike Turquette <mturquette@linaro.org>
Date: Mon, 7 Oct 2013 23:12:13 -0700
Subject: clk: of: helper for determining number of parent clocks

Walks the "clocks" array of parent clock phandles and returns the
number.

Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 include/linux/clk-provider.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 73bdb69f0c08..7e59253b8603 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -472,6 +472,7 @@ void of_clk_del_provider(struct device_node *np);
 struct clk *of_clk_src_simple_get(struct of_phandle_args *clkspec,
 				  void *data);
 struct clk *of_clk_src_onecell_get(struct of_phandle_args *clkspec, void *data);
+int of_clk_get_parent_count(struct device_node *np);
 const char *of_clk_get_parent_name(struct device_node *np, int index);
 
 void of_clk_init(const struct of_device_id *matches);
-- 
cgit v1.2.3


From b726b7dfb400c937546fa91cf8523dcb1aa2fc6e Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 7 Oct 2013 11:28:53 +0100
Subject: Revert "mm: sched: numa: Delay PTE scanning until a task is scheduled
 on a new node"

PTE scanning and NUMA hinting fault handling is expensive so commit
5bca2303 ("mm: sched: numa: Delay PTE scanning until a task is scheduled
on a new node") deferred the PTE scan until a task had been scheduled on
another node. The problem is that in the purely shared memory case that
this may never happen and no NUMA hinting fault information will be
captured. We are not ruling out the possibility that something better
can be done here but for now, this patch needs to be reverted and depend
entirely on the scan_delay to avoid punishing short-lived processes.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-16-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm_types.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index d9851eeb6e1d..b7adf1d4310c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -428,20 +428,10 @@ struct mm_struct {
 
 	/* numa_scan_seq prevents two threads setting pte_numa */
 	int numa_scan_seq;
-
-	/*
-	 * The first node a task was scheduled on. If a task runs on
-	 * a different node than Make PTE Scan Go Now.
-	 */
-	int first_nid;
 #endif
 	struct uprobes_state uprobes_state;
 };
 
-/* first nid will either be a valid NID or one of these values */
-#define NUMA_PTE_SCAN_INIT	-1
-#define NUMA_PTE_SCAN_ACTIVE	-2
-
 static inline void mm_init_cpumask(struct mm_struct *mm)
 {
 #ifdef CONFIG_CPUMASK_OFFSTACK
-- 
cgit v1.2.3


From 598f0ec0bc996e90a806ee9564af919ea5aad401 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 7 Oct 2013 11:28:55 +0100
Subject: sched/numa: Set the scan rate proportional to the memory usage of the
 task being scanned

The NUMA PTE scan rate is controlled with a combination of the
numa_balancing_scan_period_min, numa_balancing_scan_period_max and
numa_balancing_scan_size. This scan rate is independent of the size
of the task and as an aside it is further complicated by the fact that
numa_balancing_scan_size controls how many pages are marked pte_numa and
not how much virtual memory is scanned.

In combination, it is almost impossible to meaningfully tune the min and
max scan periods and reasoning about performance is complex when the time
to complete a full scan is is partially a function of the tasks memory
size. This patch alters the semantic of the min and max tunables to be
about tuning the length time it takes to complete a scan of a tasks occupied
virtual address space. Conceptually this is a lot easier to understand. There
is a "sanity" check to ensure the scan rate is never extremely fast based on
the amount of virtual memory that should be scanned in a second. The default
of 2.5G seems arbitrary but it is to have the maximum scan rate after the
patch roughly match the maximum scan rate before the patch was applied.

On a similar note, numa_scan_period is in milliseconds and not
jiffies. Properly placed pages slow the scanning rate but adding 10 jiffies
to numa_scan_period means that the rate scanning slows depends on HZ which
is confusing. Get rid of the jiffies_to_msec conversion and treat it as ms.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-18-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2ac5285db434..fdcb4c855072 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1339,6 +1339,7 @@ struct task_struct {
 	int numa_scan_seq;
 	int numa_migrate_seq;
 	unsigned int numa_scan_period;
+	unsigned int numa_scan_period_max;
 	u64 node_stamp;			/* migration stamp  */
 	struct callback_head numa_work;
 #endif /* CONFIG_NUMA_BALANCING */
-- 
cgit v1.2.3


From f809ca9a554dda49fb264c79e31c722e0b063ff8 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 7 Oct 2013 11:28:57 +0100
Subject: sched/numa: Track NUMA hinting faults on per-node basis

This patch tracks what nodes numa hinting faults were incurred on.
This information is later used to schedule a task on the node storing
the pages most frequently faulted by the task.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-20-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index fdcb4c855072..a810e95bca2b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1342,6 +1342,8 @@ struct task_struct {
 	unsigned int numa_scan_period_max;
 	u64 node_stamp;			/* migration stamp  */
 	struct callback_head numa_work;
+
+	unsigned long *numa_faults;
 #endif /* CONFIG_NUMA_BALANCING */
 
 	struct rcu_head rcu;
-- 
cgit v1.2.3


From 688b7585d16ab57a17aa4422a3b290b3a55fa679 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 7 Oct 2013 11:28:58 +0100
Subject: sched/numa: Select a preferred node with the most numa hinting faults

This patch selects a preferred node for a task to run on based on the
NUMA hinting faults. This information is later used to migrate tasks
towards the node during balancing.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-21-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index a810e95bca2b..b1fc75e7187b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1344,6 +1344,7 @@ struct task_struct {
 	struct callback_head numa_work;
 
 	unsigned long *numa_faults;
+	int numa_preferred_nid;
 #endif /* CONFIG_NUMA_BALANCING */
 
 	struct rcu_head rcu;
-- 
cgit v1.2.3


From 745d61476ddb737aad3495fa6d9a8f8c2ee59f86 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 7 Oct 2013 11:28:59 +0100
Subject: sched/numa: Update NUMA hinting faults once per scan

NUMA hinting fault counts and placement decisions are both recorded in the
same array which distorts the samples in an unpredictable fashion. The values
linearly accumulate during the scan and then decay creating a sawtooth-like
pattern in the per-node counts. It also means that placement decisions are
time sensitive. At best it means that it is very difficult to state that
the buffer holds a decaying average of past faulting behaviour. At worst,
it can confuse the load balancer if it sees one node with an artifically high
count due to very recent faulting activity and may create a bouncing effect.

This patch adds a second array. numa_faults stores the historical data
which is used for placement decisions. numa_faults_buffer holds the
fault activity during the current scan window. When the scan completes,
numa_faults decays and the values from numa_faults_buffer are copied
across.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-22-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b1fc75e7187b..a463bc3ad437 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1343,7 +1343,20 @@ struct task_struct {
 	u64 node_stamp;			/* migration stamp  */
 	struct callback_head numa_work;
 
+	/*
+	 * Exponential decaying average of faults on a per-node basis.
+	 * Scheduling placement decisions are made based on the these counts.
+	 * The values remain static for the duration of a PTE scan
+	 */
 	unsigned long *numa_faults;
+
+	/*
+	 * numa_faults_buffer records faults per node during the current
+	 * scan window. When the scan completes, the counts in numa_faults
+	 * decay and these values are copied.
+	 */
+	unsigned long *numa_faults_buffer;
+
 	int numa_preferred_nid;
 #endif /* CONFIG_NUMA_BALANCING */
 
-- 
cgit v1.2.3


From 3a7053b3224f4a8b0e8184166190076593621617 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 7 Oct 2013 11:29:00 +0100
Subject: sched/numa: Favour moving tasks towards the preferred node

This patch favours moving tasks towards NUMA node that recorded a higher
number of NUMA faults during active load balancing.  Ideally this is
self-reinforcing as the longer the task runs on that node, the more faults
it should incur causing task_numa_placement to keep the task running on that
node. In reality a big weakness is that the nodes CPUs can be overloaded
and it would be more efficient to queue tasks on an idle node and migrate
to the new node. This would require additional smarts in the balancer so
for now the balancer will simply prefer to place the task on the preferred
node for a PTE scans which is controlled by the numa_balancing_settle_count
sysctl. Once the settle_count number of scans has complete the schedule
is free to place the task on an alternative node if the load is imbalanced.

[srikar@linux.vnet.ibm.com: Fixed statistics]
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
[ Tunable and use higher faults instead of preferred. ]
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-23-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index a463bc3ad437..aecdc5a18773 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -777,6 +777,7 @@ enum cpu_idle_type {
 #define SD_ASYM_PACKING		0x0800  /* Place busy groups earlier in the domain */
 #define SD_PREFER_SIBLING	0x1000	/* Prefer to place tasks in a sibling domain */
 #define SD_OVERLAP		0x2000	/* sched_domains of this level overlap */
+#define SD_NUMA			0x4000	/* cross-node balancing */
 
 extern int __weak arch_sd_sibiling_asym_packing(void);
 
-- 
cgit v1.2.3


From ac8e895bd260cb8bb19ade6a3abd44e7abe9a01d Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 7 Oct 2013 11:29:03 +0100
Subject: sched/numa: Add infrastructure for split shared/private accounting of
 NUMA hinting faults

Ideally it would be possible to distinguish between NUMA hinting faults
that are private to a task and those that are shared.  This patch prepares
infrastructure for separately accounting shared and private faults by
allocating the necessary buffers and passing in relevant information. For
now, all faults are treated as private and detection will be introduced
later.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-26-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index aecdc5a18773..d946195eec10 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1445,10 +1445,11 @@ struct task_struct {
 #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
 
 #ifdef CONFIG_NUMA_BALANCING
-extern void task_numa_fault(int node, int pages, bool migrated);
+extern void task_numa_fault(int last_node, int node, int pages, bool migrated);
 extern void set_numabalancing_state(bool enabled);
 #else
-static inline void task_numa_fault(int node, int pages, bool migrated)
+static inline void task_numa_fault(int last_node, int node, int pages,
+				   bool migrated)
 {
 }
 static inline void set_numabalancing_state(bool enabled)
-- 
cgit v1.2.3


From 1bc115d87dffd1c43bdc3c9c9d1e3a51c195d18e Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 7 Oct 2013 11:29:05 +0100
Subject: mm: numa: Scan pages with elevated page_mapcount

Currently automatic NUMA balancing is unable to distinguish between false
shared versus private pages except by ignoring pages with an elevated
page_mapcount entirely. This avoids shared pages bouncing between the
nodes whose task is using them but that is ignored quite a lot of data.

This patch kicks away the training wheels in preparation for adding support
for identifying shared/private pages is now in place. The ordering is so
that the impact of the shared/private detection can be easily measured. Note
that the patch does not migrate shared, file-backed within vmas marked
VM_EXEC as these are generally shared library pages. Migrating such pages
is not beneficial as there is an expectation they are read-shared between
caches and iTLB and iCache pressure is generally low.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-28-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/migrate.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 8d3c57fdf221..f5096b58b20d 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -90,11 +90,12 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
 #endif /* CONFIG_MIGRATION */
 
 #ifdef CONFIG_NUMA_BALANCING
-extern int migrate_misplaced_page(struct page *page, int node);
-extern int migrate_misplaced_page(struct page *page, int node);
+extern int migrate_misplaced_page(struct page *page,
+				  struct vm_area_struct *vma, int node);
 extern bool migrate_ratelimited(int node);
 #else
-static inline int migrate_misplaced_page(struct page *page, int node)
+static inline int migrate_misplaced_page(struct page *page,
+					 struct vm_area_struct *vma, int node)
 {
 	return -EAGAIN; /* can't migrate now */
 }
-- 
cgit v1.2.3


From b795854b1fa70f6aee923ae5df74ff7afeaddcaa Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 7 Oct 2013 11:29:07 +0100
Subject: sched/numa: Set preferred NUMA node based on number of private faults

Ideally it would be possible to distinguish between NUMA hinting faults that
are private to a task and those that are shared. If treated identically
there is a risk that shared pages bounce between nodes depending on
the order they are referenced by tasks. Ultimately what is desirable is
that task private pages remain local to the task while shared pages are
interleaved between sharing tasks running on different nodes to give good
average performance. This is further complicated by THP as even
applications that partition their data may not be partitioning on a huge
page boundary.

To start with, this patch assumes that multi-threaded or multi-process
applications partition their data and that in general the private accesses
are more important for cpu->memory locality in the general case. Also,
no new infrastructure is required to treat private pages properly but
interleaving for shared pages requires additional infrastructure.

To detect private accesses the pid of the last accessing task is required
but the storage requirements are a high. This patch borrows heavily from
Ingo Molnar's patch "numa, mm, sched: Implement last-CPU+PID hash tracking"
to encode some bits from the last accessing task in the page flags as
well as the node information. Collisions will occur but it is better than
just depending on the node information. Node information is then used to
determine if a page needs to migrate. The PID information is used to detect
private/shared accesses. The preferred NUMA node is selected based on where
the maximum number of approximately private faults were measured. Shared
faults are not taken into consideration for a few reasons.

First, if there are many tasks sharing the page then they'll all move
towards the same node. The node will be compute overloaded and then
scheduled away later only to bounce back again. Alternatively the shared
tasks would just bounce around nodes because the fault information is
effectively noise. Either way accounting for shared faults the same as
private faults can result in lower performance overall.

The second reason is based on a hypothetical workload that has a small
number of very important, heavily accessed private pages but a large shared
array. The shared array would dominate the number of faults and be selected
as a preferred node even though it's the wrong decision.

The third reason is that multiple threads in a process will race each
other to fault the shared page making the fault information unreliable.

Signed-off-by: Mel Gorman <mgorman@suse.de>
[ Fix complication error when !NUMA_BALANCING. ]
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-30-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm.h                | 89 +++++++++++++++++++++++++++++----------
 include/linux/mm_types.h          |  4 +-
 include/linux/page-flags-layout.h | 28 +++++++-----
 3 files changed, 86 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8b6e55ee8855..bb412ce2a8b5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -581,11 +581,11 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
  * sets it, so none of the operations on it need to be atomic.
  */
 
-/* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_NID] | ... | FLAGS | */
+/* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_NIDPID] | ... | FLAGS | */
 #define SECTIONS_PGOFF		((sizeof(unsigned long)*8) - SECTIONS_WIDTH)
 #define NODES_PGOFF		(SECTIONS_PGOFF - NODES_WIDTH)
 #define ZONES_PGOFF		(NODES_PGOFF - ZONES_WIDTH)
-#define LAST_NID_PGOFF		(ZONES_PGOFF - LAST_NID_WIDTH)
+#define LAST_NIDPID_PGOFF	(ZONES_PGOFF - LAST_NIDPID_WIDTH)
 
 /*
  * Define the bit shifts to access each section.  For non-existent
@@ -595,7 +595,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 #define SECTIONS_PGSHIFT	(SECTIONS_PGOFF * (SECTIONS_WIDTH != 0))
 #define NODES_PGSHIFT		(NODES_PGOFF * (NODES_WIDTH != 0))
 #define ZONES_PGSHIFT		(ZONES_PGOFF * (ZONES_WIDTH != 0))
-#define LAST_NID_PGSHIFT	(LAST_NID_PGOFF * (LAST_NID_WIDTH != 0))
+#define LAST_NIDPID_PGSHIFT	(LAST_NIDPID_PGOFF * (LAST_NIDPID_WIDTH != 0))
 
 /* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */
 #ifdef NODE_NOT_IN_PAGE_FLAGS
@@ -617,7 +617,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 #define ZONES_MASK		((1UL << ZONES_WIDTH) - 1)
 #define NODES_MASK		((1UL << NODES_WIDTH) - 1)
 #define SECTIONS_MASK		((1UL << SECTIONS_WIDTH) - 1)
-#define LAST_NID_MASK		((1UL << LAST_NID_WIDTH) - 1)
+#define LAST_NIDPID_MASK	((1UL << LAST_NIDPID_WIDTH) - 1)
 #define ZONEID_MASK		((1UL << ZONEID_SHIFT) - 1)
 
 static inline enum zone_type page_zonenum(const struct page *page)
@@ -661,48 +661,93 @@ static inline int page_to_nid(const struct page *page)
 #endif
 
 #ifdef CONFIG_NUMA_BALANCING
-#ifdef LAST_NID_NOT_IN_PAGE_FLAGS
-static inline int page_nid_xchg_last(struct page *page, int nid)
+static inline int nid_pid_to_nidpid(int nid, int pid)
 {
-	return xchg(&page->_last_nid, nid);
+	return ((nid & LAST__NID_MASK) << LAST__PID_SHIFT) | (pid & LAST__PID_MASK);
 }
 
-static inline int page_nid_last(struct page *page)
+static inline int nidpid_to_pid(int nidpid)
 {
-	return page->_last_nid;
+	return nidpid & LAST__PID_MASK;
 }
-static inline void page_nid_reset_last(struct page *page)
+
+static inline int nidpid_to_nid(int nidpid)
+{
+	return (nidpid >> LAST__PID_SHIFT) & LAST__NID_MASK;
+}
+
+static inline bool nidpid_pid_unset(int nidpid)
+{
+	return nidpid_to_pid(nidpid) == (-1 & LAST__PID_MASK);
+}
+
+static inline bool nidpid_nid_unset(int nidpid)
 {
-	page->_last_nid = -1;
+	return nidpid_to_nid(nidpid) == (-1 & LAST__NID_MASK);
+}
+
+#ifdef LAST_NIDPID_NOT_IN_PAGE_FLAGS
+static inline int page_nidpid_xchg_last(struct page *page, int nid)
+{
+	return xchg(&page->_last_nidpid, nid);
+}
+
+static inline int page_nidpid_last(struct page *page)
+{
+	return page->_last_nidpid;
+}
+static inline void page_nidpid_reset_last(struct page *page)
+{
+	page->_last_nidpid = -1;
 }
 #else
-static inline int page_nid_last(struct page *page)
+static inline int page_nidpid_last(struct page *page)
 {
-	return (page->flags >> LAST_NID_PGSHIFT) & LAST_NID_MASK;
+	return (page->flags >> LAST_NIDPID_PGSHIFT) & LAST_NIDPID_MASK;
 }
 
-extern int page_nid_xchg_last(struct page *page, int nid);
+extern int page_nidpid_xchg_last(struct page *page, int nidpid);
 
-static inline void page_nid_reset_last(struct page *page)
+static inline void page_nidpid_reset_last(struct page *page)
 {
-	int nid = (1 << LAST_NID_SHIFT) - 1;
+	int nidpid = (1 << LAST_NIDPID_SHIFT) - 1;
 
-	page->flags &= ~(LAST_NID_MASK << LAST_NID_PGSHIFT);
-	page->flags |= (nid & LAST_NID_MASK) << LAST_NID_PGSHIFT;
+	page->flags &= ~(LAST_NIDPID_MASK << LAST_NIDPID_PGSHIFT);
+	page->flags |= (nidpid & LAST_NIDPID_MASK) << LAST_NIDPID_PGSHIFT;
 }
-#endif /* LAST_NID_NOT_IN_PAGE_FLAGS */
+#endif /* LAST_NIDPID_NOT_IN_PAGE_FLAGS */
 #else
-static inline int page_nid_xchg_last(struct page *page, int nid)
+static inline int page_nidpid_xchg_last(struct page *page, int nidpid)
 {
 	return page_to_nid(page);
 }
 
-static inline int page_nid_last(struct page *page)
+static inline int page_nidpid_last(struct page *page)
 {
 	return page_to_nid(page);
 }
 
-static inline void page_nid_reset_last(struct page *page)
+static inline int nidpid_to_nid(int nidpid)
+{
+	return -1;
+}
+
+static inline int nidpid_to_pid(int nidpid)
+{
+	return -1;
+}
+
+static inline int nid_pid_to_nidpid(int nid, int pid)
+{
+	return -1;
+}
+
+static inline bool nidpid_pid_unset(int nidpid)
+{
+	return 1;
+}
+
+static inline void page_nidpid_reset_last(struct page *page)
 {
 }
 #endif
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index b7adf1d4310c..38a902a6d1e3 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -174,8 +174,8 @@ struct page {
 	void *shadow;
 #endif
 
-#ifdef LAST_NID_NOT_IN_PAGE_FLAGS
-	int _last_nid;
+#ifdef LAST_NIDPID_NOT_IN_PAGE_FLAGS
+	int _last_nidpid;
 #endif
 }
 /*
diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h
index 93506a114034..02bc9184f16b 100644
--- a/include/linux/page-flags-layout.h
+++ b/include/linux/page-flags-layout.h
@@ -38,10 +38,10 @@
  * The last is when there is insufficient space in page->flags and a separate
  * lookup is necessary.
  *
- * No sparsemem or sparsemem vmemmap: |       NODE     | ZONE |          ... | FLAGS |
- *         " plus space for last_nid: |       NODE     | ZONE | LAST_NID ... | FLAGS |
- * classic sparse with space for node:| SECTION | NODE | ZONE |          ... | FLAGS |
- *         " plus space for last_nid: | SECTION | NODE | ZONE | LAST_NID ... | FLAGS |
+ * No sparsemem or sparsemem vmemmap: |       NODE     | ZONE |             ... | FLAGS |
+ *      " plus space for last_nidpid: |       NODE     | ZONE | LAST_NIDPID ... | FLAGS |
+ * classic sparse with space for node:| SECTION | NODE | ZONE |             ... | FLAGS |
+ *      " plus space for last_nidpid: | SECTION | NODE | ZONE | LAST_NIDPID ... | FLAGS |
  * classic sparse no space for node:  | SECTION |     ZONE    | ... | FLAGS |
  */
 #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
@@ -62,15 +62,21 @@
 #endif
 
 #ifdef CONFIG_NUMA_BALANCING
-#define LAST_NID_SHIFT NODES_SHIFT
+#define LAST__PID_SHIFT 8
+#define LAST__PID_MASK  ((1 << LAST__PID_SHIFT)-1)
+
+#define LAST__NID_SHIFT NODES_SHIFT
+#define LAST__NID_MASK  ((1 << LAST__NID_SHIFT)-1)
+
+#define LAST_NIDPID_SHIFT (LAST__PID_SHIFT+LAST__NID_SHIFT)
 #else
-#define LAST_NID_SHIFT 0
+#define LAST_NIDPID_SHIFT 0
 #endif
 
-#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_NID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
-#define LAST_NID_WIDTH LAST_NID_SHIFT
+#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_NIDPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
+#define LAST_NIDPID_WIDTH LAST_NIDPID_SHIFT
 #else
-#define LAST_NID_WIDTH 0
+#define LAST_NIDPID_WIDTH 0
 #endif
 
 /*
@@ -81,8 +87,8 @@
 #define NODE_NOT_IN_PAGE_FLAGS
 #endif
 
-#if defined(CONFIG_NUMA_BALANCING) && LAST_NID_WIDTH == 0
-#define LAST_NID_NOT_IN_PAGE_FLAGS
+#if defined(CONFIG_NUMA_BALANCING) && LAST_NIDPID_WIDTH == 0
+#define LAST_NIDPID_NOT_IN_PAGE_FLAGS
 #endif
 
 #endif /* _LINUX_PAGE_FLAGS_LAYOUT */
-- 
cgit v1.2.3


From fc3147245d193bd0f57307859c698fa28a20b0fe Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 7 Oct 2013 11:29:09 +0100
Subject: mm: numa: Limit NUMA scanning to migrate-on-fault VMAs

There is a 90% regression observed with a large Oracle performance test
on a 4 node system. Profiles indicated that the overhead was due to
contention on sp_lock when looking up shared memory policies. These
policies do not have the appropriate flags to allow them to be
automatically balanced so trapping faults on them is pointless. This
patch skips VMAs that do not have MPOL_F_MOF set.

[riel@redhat.com: Initial patch]

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reported-and-tested-by: Joe Mario <jmario@redhat.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-32-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mempolicy.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index da6716b9e3fe..ea4d2495c646 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -136,6 +136,7 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
 
 struct mempolicy *get_vma_policy(struct task_struct *tsk,
 		struct vm_area_struct *vma, unsigned long addr);
+bool vma_policy_mof(struct task_struct *task, struct vm_area_struct *vma);
 
 extern void numa_default_policy(void);
 extern void numa_policy_init(void);
-- 
cgit v1.2.3


From 6b9a7460b6baf6c77fc3d23d927ddfc3f3f05bf3 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 7 Oct 2013 11:29:11 +0100
Subject: sched/numa: Retry migration of tasks to CPU on a preferred node

When a preferred node is selected for a tasks there is an attempt to migrate
the task to a CPU there. This may fail in which case the task will only
migrate if the active load balancer takes action. This may never happen if
the conditions are not right. This patch will check at NUMA hinting fault
time if another attempt should be made to migrate the task. It will only
make an attempt once every five seconds.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-34-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d946195eec10..14251a8ff2ea 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1341,6 +1341,7 @@ struct task_struct {
 	int numa_migrate_seq;
 	unsigned int numa_scan_period;
 	unsigned int numa_scan_period_max;
+	unsigned long numa_migrate_retry;
 	u64 node_stamp;			/* migration stamp  */
 	struct callback_head numa_work;
 
-- 
cgit v1.2.3


From 1be0bd77c5dd7c903f46abf52f9a3650face3c1d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 7 Oct 2013 11:29:15 +0100
Subject: stop_machine: Introduce stop_two_cpus()

Introduce stop_two_cpus() in order to allow controlled swapping of two
tasks. It repurposes the stop_machine() state machine but only stops
the two cpus which we can do with on-stack structures and avoid
machine wide synchronization issues.

The ordering of CPUs is important to avoid deadlocks. If unordered then
two cpus calling stop_two_cpus on each other simultaneously would attempt
to queue in the opposite order on each CPU causing an AB-BA style deadlock.
By always having the lowest number CPU doing the queueing of works, we can
guarantee that works are always queued in the same order, and deadlocks
are avoided.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
[ Implemented deadlock avoidance. ]
Signed-off-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Link: http://lkml.kernel.org/r/1381141781-10992-38-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/stop_machine.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 3b5e910d14ca..d2abbdb8c6aa 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -28,6 +28,7 @@ struct cpu_stop_work {
 };
 
 int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg);
+int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg);
 void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
 			 struct cpu_stop_work *work_buf);
 int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
-- 
cgit v1.2.3


From ac66f5477239ebd3c4e2cbf2f591ef387aa09884 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 7 Oct 2013 11:29:16 +0100
Subject: sched/numa: Introduce migrate_swap()

Use the new stop_two_cpus() to implement migrate_swap(), a function that
flips two tasks between their respective cpus.

I'm fairly sure there's a less crude way than employing the stop_two_cpus()
method, but everything I tried either got horribly fragile and/or complex. So
keep it simple for now.

The notable detail is how we 'migrate' tasks that aren't runnable
anymore. We'll make it appear like we migrated them before they went to
sleep. The sole difference is the previous cpu in the wakeup path, so we
override this.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Link: http://lkml.kernel.org/r/1381141781-10992-39-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 14251a8ff2ea..b6619792bb13 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1043,6 +1043,8 @@ struct task_struct {
 	struct task_struct *last_wakee;
 	unsigned long wakee_flips;
 	unsigned long wakee_flip_decay_ts;
+
+	int wake_cpu;
 #endif
 	int on_rq;
 
-- 
cgit v1.2.3


From 90572890d202527c366aa9489b32404e88a7c020 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 7 Oct 2013 11:29:20 +0100
Subject: mm: numa: Change page last {nid,pid} into {cpu,pid}

Change the per page last fault tracking to use cpu,pid instead of
nid,pid. This will allow us to try and lookup the alternate task more
easily. Note that even though it is the cpu that is store in the page
flags that the mpol_misplaced decision is still based on the node.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1381141781-10992-43-git-send-email-mgorman@suse.de
[ Fixed build failure on 32-bit systems. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm.h                | 90 ++++++++++++++++++++++-----------------
 include/linux/mm_types.h          |  4 +-
 include/linux/page-flags-layout.h | 22 +++++-----
 3 files changed, 63 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index bb412ce2a8b5..ce464cd4777e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -581,11 +581,11 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
  * sets it, so none of the operations on it need to be atomic.
  */
 
-/* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_NIDPID] | ... | FLAGS | */
+/* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_CPUPID] | ... | FLAGS | */
 #define SECTIONS_PGOFF		((sizeof(unsigned long)*8) - SECTIONS_WIDTH)
 #define NODES_PGOFF		(SECTIONS_PGOFF - NODES_WIDTH)
 #define ZONES_PGOFF		(NODES_PGOFF - ZONES_WIDTH)
-#define LAST_NIDPID_PGOFF	(ZONES_PGOFF - LAST_NIDPID_WIDTH)
+#define LAST_CPUPID_PGOFF	(ZONES_PGOFF - LAST_CPUPID_WIDTH)
 
 /*
  * Define the bit shifts to access each section.  For non-existent
@@ -595,7 +595,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 #define SECTIONS_PGSHIFT	(SECTIONS_PGOFF * (SECTIONS_WIDTH != 0))
 #define NODES_PGSHIFT		(NODES_PGOFF * (NODES_WIDTH != 0))
 #define ZONES_PGSHIFT		(ZONES_PGOFF * (ZONES_WIDTH != 0))
-#define LAST_NIDPID_PGSHIFT	(LAST_NIDPID_PGOFF * (LAST_NIDPID_WIDTH != 0))
+#define LAST_CPUPID_PGSHIFT	(LAST_CPUPID_PGOFF * (LAST_CPUPID_WIDTH != 0))
 
 /* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */
 #ifdef NODE_NOT_IN_PAGE_FLAGS
@@ -617,7 +617,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 #define ZONES_MASK		((1UL << ZONES_WIDTH) - 1)
 #define NODES_MASK		((1UL << NODES_WIDTH) - 1)
 #define SECTIONS_MASK		((1UL << SECTIONS_WIDTH) - 1)
-#define LAST_NIDPID_MASK	((1UL << LAST_NIDPID_WIDTH) - 1)
+#define LAST_CPUPID_MASK	((1UL << LAST_CPUPID_WIDTH) - 1)
 #define ZONEID_MASK		((1UL << ZONEID_SHIFT) - 1)
 
 static inline enum zone_type page_zonenum(const struct page *page)
@@ -661,96 +661,106 @@ static inline int page_to_nid(const struct page *page)
 #endif
 
 #ifdef CONFIG_NUMA_BALANCING
-static inline int nid_pid_to_nidpid(int nid, int pid)
+static inline int cpu_pid_to_cpupid(int cpu, int pid)
 {
-	return ((nid & LAST__NID_MASK) << LAST__PID_SHIFT) | (pid & LAST__PID_MASK);
+	return ((cpu & LAST__CPU_MASK) << LAST__PID_SHIFT) | (pid & LAST__PID_MASK);
 }
 
-static inline int nidpid_to_pid(int nidpid)
+static inline int cpupid_to_pid(int cpupid)
 {
-	return nidpid & LAST__PID_MASK;
+	return cpupid & LAST__PID_MASK;
 }
 
-static inline int nidpid_to_nid(int nidpid)
+static inline int cpupid_to_cpu(int cpupid)
 {
-	return (nidpid >> LAST__PID_SHIFT) & LAST__NID_MASK;
+	return (cpupid >> LAST__PID_SHIFT) & LAST__CPU_MASK;
 }
 
-static inline bool nidpid_pid_unset(int nidpid)
+static inline int cpupid_to_nid(int cpupid)
 {
-	return nidpid_to_pid(nidpid) == (-1 & LAST__PID_MASK);
+	return cpu_to_node(cpupid_to_cpu(cpupid));
 }
 
-static inline bool nidpid_nid_unset(int nidpid)
+static inline bool cpupid_pid_unset(int cpupid)
 {
-	return nidpid_to_nid(nidpid) == (-1 & LAST__NID_MASK);
+	return cpupid_to_pid(cpupid) == (-1 & LAST__PID_MASK);
 }
 
-#ifdef LAST_NIDPID_NOT_IN_PAGE_FLAGS
-static inline int page_nidpid_xchg_last(struct page *page, int nid)
+static inline bool cpupid_cpu_unset(int cpupid)
 {
-	return xchg(&page->_last_nidpid, nid);
+	return cpupid_to_cpu(cpupid) == (-1 & LAST__CPU_MASK);
 }
 
-static inline int page_nidpid_last(struct page *page)
+#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
+static inline int page_cpupid_xchg_last(struct page *page, int cpupid)
 {
-	return page->_last_nidpid;
+	return xchg(&page->_last_cpupid, cpupid);
 }
-static inline void page_nidpid_reset_last(struct page *page)
+
+static inline int page_cpupid_last(struct page *page)
+{
+	return page->_last_cpupid;
+}
+static inline void page_cpupid_reset_last(struct page *page)
 {
-	page->_last_nidpid = -1;
+	page->_last_cpupid = -1;
 }
 #else
-static inline int page_nidpid_last(struct page *page)
+static inline int page_cpupid_last(struct page *page)
 {
-	return (page->flags >> LAST_NIDPID_PGSHIFT) & LAST_NIDPID_MASK;
+	return (page->flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK;
 }
 
-extern int page_nidpid_xchg_last(struct page *page, int nidpid);
+extern int page_cpupid_xchg_last(struct page *page, int cpupid);
 
-static inline void page_nidpid_reset_last(struct page *page)
+static inline void page_cpupid_reset_last(struct page *page)
 {
-	int nidpid = (1 << LAST_NIDPID_SHIFT) - 1;
+	int cpupid = (1 << LAST_CPUPID_SHIFT) - 1;
 
-	page->flags &= ~(LAST_NIDPID_MASK << LAST_NIDPID_PGSHIFT);
-	page->flags |= (nidpid & LAST_NIDPID_MASK) << LAST_NIDPID_PGSHIFT;
+	page->flags &= ~(LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT);
+	page->flags |= (cpupid & LAST_CPUPID_MASK) << LAST_CPUPID_PGSHIFT;
 }
-#endif /* LAST_NIDPID_NOT_IN_PAGE_FLAGS */
-#else
-static inline int page_nidpid_xchg_last(struct page *page, int nidpid)
+#endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */
+#else /* !CONFIG_NUMA_BALANCING */
+static inline int page_cpupid_xchg_last(struct page *page, int cpupid)
 {
-	return page_to_nid(page);
+	return page_to_nid(page); /* XXX */
 }
 
-static inline int page_nidpid_last(struct page *page)
+static inline int page_cpupid_last(struct page *page)
 {
-	return page_to_nid(page);
+	return page_to_nid(page); /* XXX */
 }
 
-static inline int nidpid_to_nid(int nidpid)
+static inline int cpupid_to_nid(int cpupid)
 {
 	return -1;
 }
 
-static inline int nidpid_to_pid(int nidpid)
+static inline int cpupid_to_pid(int cpupid)
 {
 	return -1;
 }
 
-static inline int nid_pid_to_nidpid(int nid, int pid)
+static inline int cpupid_to_cpu(int cpupid)
 {
 	return -1;
 }
 
-static inline bool nidpid_pid_unset(int nidpid)
+static inline int cpu_pid_to_cpupid(int nid, int pid)
+{
+	return -1;
+}
+
+static inline bool cpupid_pid_unset(int cpupid)
 {
 	return 1;
 }
 
-static inline void page_nidpid_reset_last(struct page *page)
+static inline void page_cpupid_reset_last(struct page *page)
 {
 }
-#endif
+#endif /* CONFIG_NUMA_BALANCING */
 
 static inline struct zone *page_zone(const struct page *page)
 {
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 38a902a6d1e3..a30f9ca66557 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -174,8 +174,8 @@ struct page {
 	void *shadow;
 #endif
 
-#ifdef LAST_NIDPID_NOT_IN_PAGE_FLAGS
-	int _last_nidpid;
+#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
+	int _last_cpupid;
 #endif
 }
 /*
diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h
index 02bc9184f16b..da523661500a 100644
--- a/include/linux/page-flags-layout.h
+++ b/include/linux/page-flags-layout.h
@@ -39,9 +39,9 @@
  * lookup is necessary.
  *
  * No sparsemem or sparsemem vmemmap: |       NODE     | ZONE |             ... | FLAGS |
- *      " plus space for last_nidpid: |       NODE     | ZONE | LAST_NIDPID ... | FLAGS |
+ *      " plus space for last_cpupid: |       NODE     | ZONE | LAST_CPUPID ... | FLAGS |
  * classic sparse with space for node:| SECTION | NODE | ZONE |             ... | FLAGS |
- *      " plus space for last_nidpid: | SECTION | NODE | ZONE | LAST_NIDPID ... | FLAGS |
+ *      " plus space for last_cpupid: | SECTION | NODE | ZONE | LAST_CPUPID ... | FLAGS |
  * classic sparse no space for node:  | SECTION |     ZONE    | ... | FLAGS |
  */
 #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
@@ -65,18 +65,18 @@
 #define LAST__PID_SHIFT 8
 #define LAST__PID_MASK  ((1 << LAST__PID_SHIFT)-1)
 
-#define LAST__NID_SHIFT NODES_SHIFT
-#define LAST__NID_MASK  ((1 << LAST__NID_SHIFT)-1)
+#define LAST__CPU_SHIFT NR_CPUS_BITS
+#define LAST__CPU_MASK  ((1 << LAST__CPU_SHIFT)-1)
 
-#define LAST_NIDPID_SHIFT (LAST__PID_SHIFT+LAST__NID_SHIFT)
+#define LAST_CPUPID_SHIFT (LAST__PID_SHIFT+LAST__CPU_SHIFT)
 #else
-#define LAST_NIDPID_SHIFT 0
+#define LAST_CPUPID_SHIFT 0
 #endif
 
-#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_NIDPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
-#define LAST_NIDPID_WIDTH LAST_NIDPID_SHIFT
+#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
+#define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT
 #else
-#define LAST_NIDPID_WIDTH 0
+#define LAST_CPUPID_WIDTH 0
 #endif
 
 /*
@@ -87,8 +87,8 @@
 #define NODE_NOT_IN_PAGE_FLAGS
 #endif
 
-#if defined(CONFIG_NUMA_BALANCING) && LAST_NIDPID_WIDTH == 0
-#define LAST_NIDPID_NOT_IN_PAGE_FLAGS
+#if defined(CONFIG_NUMA_BALANCING) && LAST_CPUPID_WIDTH == 0
+#define LAST_CPUPID_NOT_IN_PAGE_FLAGS
 #endif
 
 #endif /* _LINUX_PAGE_FLAGS_LAYOUT */
-- 
cgit v1.2.3


From 8c8a743c5087bac9caac8155b8f3b367e75cdd0b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 7 Oct 2013 11:29:21 +0100
Subject: sched/numa: Use {cpu, pid} to create task groups for shared faults

While parallel applications tend to align their data on the cache
boundary, they tend not to align on the page or THP boundary.
Consequently tasks that partition their data can still "false-share"
pages presenting a problem for optimal NUMA placement.

This patch uses NUMA hinting faults to chain tasks together into
numa_groups. As well as storing the NID a task was running on when
accessing a page a truncated representation of the faulting PID is
stored. If subsequent faults are from different PIDs it is reasonable
to assume that those two tasks share a page and are candidates for
being grouped together. Note that this patch makes no scheduling
decisions based on the grouping information.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1381141781-10992-44-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm.h    | 11 +++++++++++
 include/linux/sched.h |  3 +++
 2 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index ce464cd4777e..81443d557a2e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -691,6 +691,12 @@ static inline bool cpupid_cpu_unset(int cpupid)
 	return cpupid_to_cpu(cpupid) == (-1 & LAST__CPU_MASK);
 }
 
+static inline bool __cpupid_match_pid(pid_t task_pid, int cpupid)
+{
+	return (task_pid & LAST__PID_MASK) == cpupid_to_pid(cpupid);
+}
+
+#define cpupid_match_pid(task, cpupid) __cpupid_match_pid(task->pid, cpupid)
 #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
 static inline int page_cpupid_xchg_last(struct page *page, int cpupid)
 {
@@ -760,6 +766,11 @@ static inline bool cpupid_pid_unset(int cpupid)
 static inline void page_cpupid_reset_last(struct page *page)
 {
 }
+
+static inline bool cpupid_match_pid(struct task_struct *task, int cpupid)
+{
+	return false;
+}
 #endif /* CONFIG_NUMA_BALANCING */
 
 static inline struct zone *page_zone(const struct page *page)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b6619792bb13..f587ded5c148 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1347,6 +1347,9 @@ struct task_struct {
 	u64 node_stamp;			/* migration stamp  */
 	struct callback_head numa_work;
 
+	struct list_head numa_entry;
+	struct numa_group *numa_group;
+
 	/*
 	 * Exponential decaying average of faults on a per-node basis.
 	 * Scheduling placement decisions are made based on the these counts.
-- 
cgit v1.2.3


From e29cf08b05dc0b8151d65704d96d525a9e179a6b Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 7 Oct 2013 11:29:22 +0100
Subject: sched/numa: Report a NUMA task group ID

It is desirable to model from userspace how the scheduler groups tasks
over time. This patch adds an ID to the numa_group and reports it via
/proc/PID/status.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-45-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f587ded5c148..b0b343b1ba64 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1452,12 +1452,17 @@ struct task_struct {
 
 #ifdef CONFIG_NUMA_BALANCING
 extern void task_numa_fault(int last_node, int node, int pages, bool migrated);
+extern pid_t task_numa_group_id(struct task_struct *p);
 extern void set_numabalancing_state(bool enabled);
 #else
 static inline void task_numa_fault(int last_node, int node, int pages,
 				   bool migrated)
 {
 }
+static inline pid_t task_numa_group_id(struct task_struct *p)
+{
+	return 0;
+}
 static inline void set_numabalancing_state(bool enabled)
 {
 }
-- 
cgit v1.2.3


From 6688cc05473b36a0a3d3971e1adf1712919b32eb Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 7 Oct 2013 11:29:24 +0100
Subject: mm: numa: Do not group on RO pages

And here's a little something to make sure not the whole world ends up
in a single group.

As while we don't migrate shared executable pages, we do scan/fault on
them. And since everybody links to libc, everybody ends up in the same
group.

Suggested-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1381141781-10992-47-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b0b343b1ba64..ff543851a18a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1450,13 +1450,16 @@ struct task_struct {
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
 #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
 
+#define TNF_MIGRATED	0x01
+#define TNF_NO_GROUP	0x02
+
 #ifdef CONFIG_NUMA_BALANCING
-extern void task_numa_fault(int last_node, int node, int pages, bool migrated);
+extern void task_numa_fault(int last_node, int node, int pages, int flags);
 extern pid_t task_numa_group_id(struct task_struct *p);
 extern void set_numabalancing_state(bool enabled);
 #else
 static inline void task_numa_fault(int last_node, int node, int pages,
-				   bool migrated)
+				   int flags)
 {
 }
 static inline pid_t task_numa_group_id(struct task_struct *p)
-- 
cgit v1.2.3


From 5e1576ed0e54d419286a8096133029062b6ad456 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Mon, 7 Oct 2013 11:29:26 +0100
Subject: sched/numa: Stay on the same node if CLONE_VM

A newly spawned thread inside a process should stay on the same
NUMA node as its parent. This prevents processes from being "torn"
across multiple NUMA nodes every time they spawn a new thread.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-49-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ff543851a18a..8563e3dd5c0f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2021,7 +2021,7 @@ extern void wake_up_new_task(struct task_struct *tsk);
 #else
  static inline void kick_process(struct task_struct *tsk) { }
 #endif
-extern void sched_fork(struct task_struct *p);
+extern void sched_fork(unsigned long clone_flags, struct task_struct *p);
 extern void sched_dead(struct task_struct *p);
 
 extern void proc_caches_init(void);
-- 
cgit v1.2.3


From 83e1d2cd9eabec5164afea295ff06b941ae8e4a9 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 7 Oct 2013 11:29:27 +0100
Subject: sched/numa: Use group fault statistics in numa placement

This patch uses the fraction of faults on a particular node for both task
and group, to figure out the best node to place a task.  If the task and
group statistics disagree on what the preferred node should be then a full
rescan will select the node with the best combined weight.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-50-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8563e3dd5c0f..724482200b83 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1356,6 +1356,7 @@ struct task_struct {
 	 * The values remain static for the duration of a PTE scan
 	 */
 	unsigned long *numa_faults;
+	unsigned long total_numa_faults;
 
 	/*
 	 * numa_faults_buffer records faults per node during the current
-- 
cgit v1.2.3


From 82727018b0d33d188e9916bcf76f18387484cb04 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Mon, 7 Oct 2013 11:29:28 +0100
Subject: sched/numa: Call task_numa_free() from do_execve()

It is possible for a task in a numa group to call exec, and
have the new (unrelated) executable inherit the numa group
association from its former self.

This has the potential to break numa grouping, and is trivial
to fix.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-51-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 724482200b83..f6385107c352 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1458,6 +1458,7 @@ struct task_struct {
 extern void task_numa_fault(int last_node, int node, int pages, int flags);
 extern pid_t task_numa_group_id(struct task_struct *p);
 extern void set_numabalancing_state(bool enabled);
+extern void task_numa_free(struct task_struct *p);
 #else
 static inline void task_numa_fault(int last_node, int node, int pages,
 				   int flags)
@@ -1470,6 +1471,9 @@ static inline pid_t task_numa_group_id(struct task_struct *p)
 static inline void set_numabalancing_state(bool enabled)
 {
 }
+static inline void task_numa_free(struct task_struct *p)
+{
+}
 #endif
 
 static inline struct pid *task_pid(struct task_struct *task)
-- 
cgit v1.2.3


From b32e86b4301e345611f0446265f782a229faadf6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 7 Oct 2013 11:29:30 +0100
Subject: sched/numa: Add debugging

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: http://lkml.kernel.org/r/1381141781-10992-53-git-send-email-mgorman@suse.de
---
 include/linux/sched.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f6385107c352..1127a46ac3d2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1366,6 +1366,7 @@ struct task_struct {
 	unsigned long *numa_faults_buffer;
 
 	int numa_preferred_nid;
+	unsigned long numa_pages_migrated;
 #endif /* CONFIG_NUMA_BALANCING */
 
 	struct rcu_head rcu;
@@ -2661,6 +2662,11 @@ static inline unsigned int task_cpu(const struct task_struct *p)
 	return task_thread_info(p)->cpu;
 }
 
+static inline int task_node(const struct task_struct *p)
+{
+	return cpu_to_node(task_cpu(p));
+}
+
 extern void set_task_cpu(struct task_struct *p, unsigned int cpu);
 
 #else
-- 
cgit v1.2.3


From dabe1d992414a6456e60e41f1d1ad8affc6d444d Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Mon, 7 Oct 2013 11:29:34 +0100
Subject: sched/numa: Be more careful about joining numa groups

Due to the way the pid is truncated, and tasks are moved between
CPUs by the scheduler, it is possible for the current task_numa_fault
to group together tasks that do not actually share memory together.

This patch adds a few easy sanity checks to task_numa_fault, joining
tasks together if they share the same tsk->mm, or if the fault was on
a page with an elevated mapcount, in a shared VMA.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-57-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1127a46ac3d2..59f953b2e413 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1454,6 +1454,7 @@ struct task_struct {
 
 #define TNF_MIGRATED	0x01
 #define TNF_NO_GROUP	0x02
+#define TNF_SHARED	0x04
 
 #ifdef CONFIG_NUMA_BALANCING
 extern void task_numa_fault(int last_node, int node, int pages, int flags);
-- 
cgit v1.2.3


From 04bb2f9475054298f0c67a89ca92cade42d3fe5e Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Mon, 7 Oct 2013 11:29:36 +0100
Subject: sched/numa: Adjust scan rate in task_numa_placement

Adjust numa_scan_period in task_numa_placement, depending on how much
useful work the numa code can do. The more local faults there are in a
given scan window the longer the period (and hence the slower the scan rate)
during the next window. If there are excessive shared faults then the scan
period will decrease with the amount of scaling depending on whether the
ratio of shared/private faults. If the preferred node changes then the
scan rate is reset to recheck if the task is properly placed.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-59-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 59f953b2e413..2292f6c1596f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1365,6 +1365,14 @@ struct task_struct {
 	 */
 	unsigned long *numa_faults_buffer;
 
+	/*
+	 * numa_faults_locality tracks if faults recorded during the last
+	 * scan window were remote/local. The task scan period is adapted
+	 * based on the locality of the faults with different weights
+	 * depending on whether they were shared or private faults
+	 */
+	unsigned long numa_faults_locality[2];
+
 	int numa_preferred_nid;
 	unsigned long numa_pages_migrated;
 #endif /* CONFIG_NUMA_BALANCING */
@@ -1455,6 +1463,7 @@ struct task_struct {
 #define TNF_MIGRATED	0x01
 #define TNF_NO_GROUP	0x02
 #define TNF_SHARED	0x04
+#define TNF_FAULT_LOCAL	0x08
 
 #ifdef CONFIG_NUMA_BALANCING
 extern void task_numa_fault(int last_node, int node, int pages, int flags);
-- 
cgit v1.2.3


From 930aa174fcc8b0efaad102fd80f677b92f35eaa2 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 7 Oct 2013 11:29:37 +0100
Subject: sched/numa: Remove the numa_balancing_scan_period_reset sysctl

With scan rate adaptions based on whether the workload has properly
converged or not there should be no need for the scan period reset
hammer. Get rid of it.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-60-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm_types.h     | 3 ---
 include/linux/sched/sysctl.h | 1 -
 2 files changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index a30f9ca66557..a3198e5aaf4e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -420,9 +420,6 @@ struct mm_struct {
 	 */
 	unsigned long numa_next_scan;
 
-	/* numa_next_reset is when the PTE scanner period will be reset */
-	unsigned long numa_next_reset;
-
 	/* Restart point for scanning and setting pte_numa */
 	unsigned long numa_scan_offset;
 
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index bf8086b2506e..10d16c4fbe89 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -47,7 +47,6 @@ extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
 extern unsigned int sysctl_numa_balancing_scan_delay;
 extern unsigned int sysctl_numa_balancing_scan_period_min;
 extern unsigned int sysctl_numa_balancing_scan_period_max;
-extern unsigned int sysctl_numa_balancing_scan_period_reset;
 extern unsigned int sysctl_numa_balancing_scan_size;
 extern unsigned int sysctl_numa_balancing_settle_count;
 
-- 
cgit v1.2.3


From 1e3646ffc64b232cb14a5ef01d7b98997c1b73f9 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Mon, 7 Oct 2013 11:29:38 +0100
Subject: mm: numa: Revert temporarily disabling of NUMA migration

With the scan rate code working (at least for multi-instance specjbb),
the large hammer that is "sched: Do not migrate memory immediately after
switching node" can be replaced with something smarter. Revert temporarily
migration disabling and all traces of numa_migrate_seq.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-61-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2292f6c1596f..d24f70ffddee 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1340,7 +1340,6 @@ struct task_struct {
 #endif
 #ifdef CONFIG_NUMA_BALANCING
 	int numa_scan_seq;
-	int numa_migrate_seq;
 	unsigned int numa_scan_period;
 	unsigned int numa_scan_period_max;
 	unsigned long numa_migrate_retry;
-- 
cgit v1.2.3


From de1c9ce6f07fec0381a39a9d0b379ea35aa1167f Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Mon, 7 Oct 2013 11:29:39 +0100
Subject: sched/numa: Skip some page migrations after a shared fault

Shared faults can lead to lots of unnecessary page migrations,
slowing down the system, and causing private faults to hit the
per-pgdat migration ratelimit.

This patch adds sysctl numa_balancing_migrate_deferred, which specifies
how many shared page migrations to skip unconditionally, after each page
migration that is skipped because it is a shared fault.

This reduces the number of page migrations back and forth in
shared fault situations. It also gives a strong preference to
the tasks that are already running where most of the memory is,
and to moving the other tasks to near the memory.

Testing this with a much higher scan rate than the default
still seems to result in fewer page migrations than before.

Memory seems to be somewhat better consolidated than previously,
with multi-instance specjbb runs on a 4 node system.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-62-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d24f70ffddee..833eed55cf43 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1342,6 +1342,8 @@ struct task_struct {
 	int numa_scan_seq;
 	unsigned int numa_scan_period;
 	unsigned int numa_scan_period_max;
+	int numa_preferred_nid;
+	int numa_migrate_deferred;
 	unsigned long numa_migrate_retry;
 	u64 node_stamp;			/* migration stamp  */
 	struct callback_head numa_work;
@@ -1372,7 +1374,6 @@ struct task_struct {
 	 */
 	unsigned long numa_faults_locality[2];
 
-	int numa_preferred_nid;
 	unsigned long numa_pages_migrated;
 #endif /* CONFIG_NUMA_BALANCING */
 
@@ -1469,6 +1470,8 @@ extern void task_numa_fault(int last_node, int node, int pages, int flags);
 extern pid_t task_numa_group_id(struct task_struct *p);
 extern void set_numabalancing_state(bool enabled);
 extern void task_numa_free(struct task_struct *p);
+
+extern unsigned int sysctl_numa_balancing_migrate_deferred;
 #else
 static inline void task_numa_fault(int last_node, int node, int pages,
 				   int flags)
-- 
cgit v1.2.3


From 915f441b6f31b1a8ee01e9263a4e2d44c434d832 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@linaro.org>
Date: Wed, 9 Oct 2013 13:30:10 +0100
Subject: regmap: Provide asynchronous write and update bits operations

Make it easier for drivers to include single register writes in
asynchronous sequences by providing async versions of the write
and update bits operations. The update bits operations are only
likely to be effective when used with devices that have caches
but this is common enough to be useful.

Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/regmap.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index a10380bfbeac..114565befbd2 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -374,6 +374,7 @@ int regmap_reinit_cache(struct regmap *map,
 			const struct regmap_config *config);
 struct regmap *dev_get_regmap(struct device *dev, const char *name);
 int regmap_write(struct regmap *map, unsigned int reg, unsigned int val);
+int regmap_write_async(struct regmap *map, unsigned int reg, unsigned int val);
 int regmap_raw_write(struct regmap *map, unsigned int reg,
 		     const void *val, size_t val_len);
 int regmap_bulk_write(struct regmap *map, unsigned int reg, const void *val,
@@ -387,9 +388,14 @@ int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val,
 		     size_t val_count);
 int regmap_update_bits(struct regmap *map, unsigned int reg,
 		       unsigned int mask, unsigned int val);
+int regmap_update_bits_async(struct regmap *map, unsigned int reg,
+			     unsigned int mask, unsigned int val);
 int regmap_update_bits_check(struct regmap *map, unsigned int reg,
 			     unsigned int mask, unsigned int val,
 			     bool *change);
+int regmap_update_bits_check_async(struct regmap *map, unsigned int reg,
+				   unsigned int mask, unsigned int val,
+				   bool *change);
 int regmap_get_val_bytes(struct regmap *map);
 int regmap_async_complete(struct regmap *map);
 bool regmap_can_raw_write(struct regmap *map);
@@ -527,6 +533,13 @@ static inline int regmap_write(struct regmap *map, unsigned int reg,
 	return -EINVAL;
 }
 
+static inline int regmap_write_async(struct regmap *map, unsigned int reg,
+				     unsigned int val)
+{
+	WARN_ONCE(1, "regmap API is disabled");
+	return -EINVAL;
+}
+
 static inline int regmap_raw_write(struct regmap *map, unsigned int reg,
 				   const void *val, size_t val_len)
 {
@@ -576,6 +589,14 @@ static inline int regmap_update_bits(struct regmap *map, unsigned int reg,
 	return -EINVAL;
 }
 
+static inline int regmap_update_bits_async(struct regmap *map,
+					   unsigned int reg,
+					   unsigned int mask, unsigned int val)
+{
+	WARN_ONCE(1, "regmap API is disabled");
+	return -EINVAL;
+}
+
 static inline int regmap_update_bits_check(struct regmap *map,
 					   unsigned int reg,
 					   unsigned int mask, unsigned int val,
@@ -585,6 +606,16 @@ static inline int regmap_update_bits_check(struct regmap *map,
 	return -EINVAL;
 }
 
+static inline int regmap_update_bits_check_async(struct regmap *map,
+						 unsigned int reg,
+						 unsigned int mask,
+						 unsigned int val,
+						 bool *change)
+{
+	WARN_ONCE(1, "regmap API is disabled");
+	return -EINVAL;
+}
+
 static inline int regmap_get_val_bytes(struct regmap *map)
 {
 	WARN_ONCE(1, "regmap API is disabled");
-- 
cgit v1.2.3


From a8bf7527a2e17ccf1366e67f6ac728327ca34c40 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Mon, 26 Aug 2013 11:22:45 -0500
Subject: of: create unflatten_and_copy_device_tree

Several architectures using DT support built-in dtb's in the init
section. These platforms need to copy the dtb from init since the
strings are referenced after unflattening. Every arch has their own
copying routine which do the same thing. Create a common function,
unflatten_and_copy_device_tree, to copy the dtb when unflattening the
dtb.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
---
 include/linux/of_fdt.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index a478c62a2aab..58c28a8cc257 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -118,9 +118,11 @@ extern int early_init_dt_scan_root(unsigned long node, const char *uname,
 
 /* Other Prototypes */
 extern void unflatten_device_tree(void);
+extern void unflatten_and_copy_device_tree(void);
 extern void early_init_devtree(void *);
 #else /* CONFIG_OF_FLATTREE */
 static inline void unflatten_device_tree(void) {}
+static inline void unflatten_and_copy_device_tree(void) {}
 #endif /* CONFIG_OF_FLATTREE */
 
 #endif /* __ASSEMBLY__ */
-- 
cgit v1.2.3


From 0288ffcbfdf9b8656e7320c24caa1e4c1d498287 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Mon, 26 Aug 2013 09:47:40 -0500
Subject: of: Introduce common early_init_dt_scan

Most architectures scan the all the same items early in the FDT and none
are really architecture specific. Create a common early_init_dt_scan to
unify the early scan of root, memory, and chosen nodes in the flattened
DT.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
---
 include/linux/of_fdt.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 58c28a8cc257..73e16511134e 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -116,6 +116,8 @@ extern void early_init_dt_setup_initrd_arch(u64 start, u64 end);
 extern int early_init_dt_scan_root(unsigned long node, const char *uname,
 				   int depth, void *data);
 
+extern bool early_init_dt_scan(void *params);
+
 /* Other Prototypes */
 extern void unflatten_device_tree(void);
 extern void unflatten_and_copy_device_tree(void);
-- 
cgit v1.2.3


From 29eb45a9ab4839a1e9cef2bcf369b918c9c4fcad Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Fri, 30 Aug 2013 17:06:53 -0500
Subject: of: remove early_init_dt_setup_initrd_arch

All arches do essentially the same thing now for
early_init_dt_setup_initrd_arch, so it can now be removed.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Vineet Gupta <vgupta@synopsys.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Mark Salter <msalter@redhat.com>
Cc: Aurelien Jacquiot <a-jacquiot@ti.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Cc: Chris Zankel <chris@zankel.net>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
---
 include/linux/of_fdt.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 73e16511134e..b365f5ac7b54 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -96,22 +96,12 @@ extern int of_scan_flat_dt_by_path(const char *path,
 
 extern int early_init_dt_scan_chosen(unsigned long node, const char *uname,
 				     int depth, void *data);
-extern void early_init_dt_check_for_initrd(unsigned long node);
 extern int early_init_dt_scan_memory(unsigned long node, const char *uname,
 				     int depth, void *data);
 extern void early_init_dt_add_memory_arch(u64 base, u64 size);
 extern void * early_init_dt_alloc_memory_arch(u64 size, u64 align);
 extern u64 dt_mem_next_cell(int s, __be32 **cellp);
 
-/*
- * If BLK_DEV_INITRD, the fdt early init code will call this function,
- * to be provided by the arch code. start and end are specified as
- * physical addresses.
- */
-#ifdef CONFIG_BLK_DEV_INITRD
-extern void early_init_dt_setup_initrd_arch(u64 start, u64 end);
-#endif
-
 /* Early flat tree scan hooks */
 extern int early_init_dt_scan_root(unsigned long node, const char *uname,
 				   int depth, void *data);
-- 
cgit v1.2.3


From b4042ceaabbd913bc5b397ddd1e396eeb312d72f Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Thu, 18 Jul 2013 16:21:19 -0700
Subject: sched_clock: Remove sched_clock_func() hook

Nobody is using sched_clock_func() anymore now that sched_clock
supports up to 64 bits. Remove the hook so that new code only
uses sched_clock_register().

Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/sched_clock.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h
index eca7abeb86fc..cddf0c2940b6 100644
--- a/include/linux/sched_clock.h
+++ b/include/linux/sched_clock.h
@@ -18,6 +18,4 @@ extern void setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate);
 extern void sched_clock_register(u64 (*read)(void), int bits,
 				 unsigned long rate);
 
-extern unsigned long long (*sched_clock_func)(void);
-
 #endif
-- 
cgit v1.2.3


From 6a903a2551ef778d60ce4341722d611144251398 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Tue, 27 Aug 2013 21:41:56 -0500
Subject: of: introduce common FDT machine related functions

Introduce common of_flat_dt_match_machine and
of_flat_dt_get_machine_name functions to unify architectures' handling
of machine level model and compatible properties.

Several architectures match the root compatible string with an arch
specific list of machine descriptors duplicating the same search
algorithm. Create a common implementation with a simple architecture
specific hook to iterate over each machine's match table.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
---
 include/linux/of_fdt.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index b365f5ac7b54..0beaee9dac1f 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -108,11 +108,16 @@ extern int early_init_dt_scan_root(unsigned long node, const char *uname,
 
 extern bool early_init_dt_scan(void *params);
 
+extern const char *of_flat_dt_get_machine_name(void);
+extern const void *of_flat_dt_match_machine(const void *default_match,
+		const void * (*get_next_compat)(const char * const**));
+
 /* Other Prototypes */
 extern void unflatten_device_tree(void);
 extern void unflatten_and_copy_device_tree(void);
 extern void early_init_devtree(void *);
 #else /* CONFIG_OF_FLATTREE */
+static inline const char *of_flat_dt_get_machine_name(void) { return NULL; }
 static inline void unflatten_device_tree(void) {}
 static inline void unflatten_and_copy_device_tree(void) {}
 #endif /* CONFIG_OF_FLATTREE */
-- 
cgit v1.2.3


From 25ff79443cbfa924b8df1d4a8a0fbff83816938a Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Sat, 7 Sep 2013 14:07:11 -0500
Subject: of: implement pci_address_to_pio as weak function

Implement pci_address_to_pio as weak function to remove the dependency on
asm/prom.h. This is in preparation to make prom.h optional.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Cc: Grant Likely <grant.likely@linaro.org>
---
 include/linux/of_address.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index 4c2e6f26432c..f6fc6899ceae 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -52,10 +52,7 @@ extern void __iomem *of_iomap(struct device_node *device, int index);
 extern const __be32 *of_get_address(struct device_node *dev, int index,
 			   u64 *size, unsigned int *flags);
 
-#ifndef pci_address_to_pio
-static inline unsigned long pci_address_to_pio(phys_addr_t addr) { return -1; }
-#define pci_address_to_pio pci_address_to_pio
-#endif
+extern unsigned long pci_address_to_pio(phys_addr_t addr);
 
 extern int of_pci_range_parser_init(struct of_pci_range_parser *parser,
 			struct device_node *node);
-- 
cgit v1.2.3


From 0c3f061c195ceb891067b6de9e4ecc347c4dea31 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Tue, 17 Sep 2013 10:42:50 -0500
Subject: of: implement of_node_to_nid as a weak function

Implement of_node_to_nid as weak function to remove the dependency on
asm/prom.h. This is in preparation to make prom.h optional.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
---
 include/linux/of.h | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index f95aee391e30..4d294a0b8a57 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -534,13 +534,10 @@ static inline const char *of_prop_next_string(struct property *prop,
 #define of_match_node(_matches, _node)	NULL
 #endif /* CONFIG_OF */
 
-#ifndef of_node_to_nid
-static inline int of_node_to_nid(struct device_node *np)
-{
-	return numa_node_id();
-}
-
-#define of_node_to_nid of_node_to_nid
+#if defined(CONFIG_OF) && defined(CONFIG_NUMA)
+extern int of_node_to_nid(struct device_node *np);
+#else
+static inline int of_node_to_nid(struct device_node *device) { return 0; }
 #endif
 
 /**
-- 
cgit v1.2.3


From 4acf4b9cd4534aaa9102004937e1ba79da01d008 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Mon, 16 Sep 2013 21:03:24 -0500
Subject: of: move of_address_to_resource and of_iomap declarations from sparc

Move of_address_to_resource and of_iomap declarations to common code. These
only differ on sparc, but the declarations are the same and don't need to
be in arch header.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: sparclinux@vger.kernel.org
---
 include/linux/of_address.h | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index f6fc6899ceae..e8a179773a1a 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -60,13 +60,6 @@ extern struct of_pci_range *of_pci_range_parser_one(
 					struct of_pci_range_parser *parser,
 					struct of_pci_range *range);
 #else /* CONFIG_OF_ADDRESS */
-#ifndef of_address_to_resource
-static inline int of_address_to_resource(struct device_node *dev, int index,
-					 struct resource *r)
-{
-	return -EINVAL;
-}
-#endif
 static inline struct device_node *of_find_matching_node_by_address(
 					struct device_node *from,
 					const struct of_device_id *matches,
@@ -74,12 +67,7 @@ static inline struct device_node *of_find_matching_node_by_address(
 {
 	return NULL;
 }
-#ifndef of_iomap
-static inline void __iomem *of_iomap(struct device_node *device, int index)
-{
-	return NULL;
-}
-#endif
+
 static inline const __be32 *of_get_address(struct device_node *dev, int index,
 					u64 *size, unsigned int *flags)
 {
@@ -100,6 +88,22 @@ static inline struct of_pci_range *of_pci_range_parser_one(
 }
 #endif /* CONFIG_OF_ADDRESS */
 
+#ifdef CONFIG_OF
+extern int of_address_to_resource(struct device_node *dev, int index,
+				  struct resource *r);
+void __iomem *of_iomap(struct device_node *node, int index);
+#else
+static inline int of_address_to_resource(struct device_node *dev, int index,
+					 struct resource *r)
+{
+	return -EINVAL;
+}
+
+static inline void __iomem *of_iomap(struct device_node *device, int index)
+{
+	return NULL;
+}
+#endif
 
 #if defined(CONFIG_OF_ADDRESS) && defined(CONFIG_PCI)
 extern const __be32 *of_get_pci_address(struct device_node *dev, int bar_no,
-- 
cgit v1.2.3


From d0dfa16a600190d142f7538e5909d13c35b60d98 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Mon, 16 Sep 2013 21:05:05 -0500
Subject: of: move of_translate_dma_address to of_address.h

of_translate_dma_address is implemented in common code, so move the
declaration there too.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: linuxppc-dev@lists.ozlabs.org
---
 include/linux/of_address.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index e8a179773a1a..5f6ed6b182b8 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -34,6 +34,10 @@ static inline void of_pci_range_to_resource(struct of_pci_range *range,
 	res->name = np->full_name;
 }
 
+/* Translate a DMA address from device space to CPU space */
+extern u64 of_translate_dma_address(struct device_node *dev,
+				    const __be32 *in_addr);
+
 #ifdef CONFIG_OF_ADDRESS
 extern u64 of_translate_address(struct device_node *np, const __be32 *addr);
 extern bool of_can_translate_address(struct device_node *dev);
-- 
cgit v1.2.3


From b5b4bb3f6a11f9c37b6d53138244f2ffe5bacd12 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Sat, 7 Sep 2013 14:08:20 -0500
Subject: of: only include prom.h on sparc

The dependency on prom.h by the core DT code is now removed and only
sparc needs to include prom.h for the core code.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
---
 include/linux/of.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index 4d294a0b8a57..54017b83650b 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -136,7 +136,9 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size)
 	return of_read_number(cell, size);
 }
 
+#if defined(CONFIG_SPARC)
 #include <asm/prom.h>
+#endif
 
 /* Default #address and #size cells.  Allow arch asm/prom.h to override */
 #if !defined(OF_ROOT_NODE_ADDR_CELLS_DEFAULT)
-- 
cgit v1.2.3


From 81fcfb813fe99c30f77dd3ed9a4e541d14a9ed01 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Wed, 28 Aug 2013 18:37:39 +1000
Subject: hashtable: add hash_for_each_possible_rcu_notrace()

This adds hash_for_each_possible_rcu_notrace() which is basically
a notrace clone of hash_for_each_possible_rcu() which cannot be
used in real mode due to its tracing/debugging capability.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/hashtable.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hashtable.h b/include/linux/hashtable.h
index a9df51f5d54c..519b6e2d769e 100644
--- a/include/linux/hashtable.h
+++ b/include/linux/hashtable.h
@@ -173,6 +173,21 @@ static inline void hash_del_rcu(struct hlist_node *node)
 	hlist_for_each_entry_rcu(obj, &name[hash_min(key, HASH_BITS(name))],\
 		member)
 
+/**
+ * hash_for_each_possible_rcu_notrace - iterate over all possible objects hashing
+ * to the same bucket in an rcu enabled hashtable in a rcu enabled hashtable
+ * @name: hashtable to iterate
+ * @obj: the type * to use as a loop cursor for each entry
+ * @member: the name of the hlist_node within the struct
+ * @key: the key of the objects to iterate over
+ *
+ * This is the same as hash_for_each_possible_rcu() except that it does
+ * not do any RCU debugging or tracing.
+ */
+#define hash_for_each_possible_rcu_notrace(name, obj, member, key) \
+	hlist_for_each_entry_rcu_notrace(obj, \
+		&name[hash_min(key, HASH_BITS(name))], member)
+
 /**
  * hash_for_each_possible_safe - iterate over all possible objects hashing to the
  * same bucket safe against removals
-- 
cgit v1.2.3


From 8e0861fa3c4edfc2f30dd4cf4d58d3929f7c1b23 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Wed, 28 Aug 2013 18:37:42 +1000
Subject: powerpc: Prepare to support kernel handling of IOMMU map/unmap

The current VFIO-on-POWER implementation supports only user mode
driven mapping, i.e. QEMU is sending requests to map/unmap pages.
However this approach is really slow, so we want to move that to KVM.
Since H_PUT_TCE can be extremely performance sensitive (especially with
network adapters where each packet needs to be mapped/unmapped) we chose
to implement that as a "fast" hypercall directly in "real
mode" (processor still in the guest context but MMU off).

To be able to do that, we need to provide some facilities to
access the struct page count within that real mode environment as things
like the sparsemem vmemmap mappings aren't accessible.

This adds an API function realmode_pfn_to_page() to get page struct when
MMU is off.

This adds to MM a new function put_page_unless_one() which drops a page
if counter is bigger than 1. It is going to be used when MMU is off
(for example, real mode on PPC64) and we want to make sure that page
release will not happen in real mode as it may crash the kernel in
a horrible way.

CONFIG_SPARSEMEM_VMEMMAP and CONFIG_FLATMEM are supported.

Cc: linux-mm@kvack.org
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/mm.h         | 14 ++++++++++++++
 include/linux/page-flags.h |  4 +++-
 2 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8b6e55ee8855..1a0668e5a4ee 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -297,12 +297,26 @@ static inline int put_page_testzero(struct page *page)
 /*
  * Try to grab a ref unless the page has a refcount of zero, return false if
  * that is the case.
+ * This can be called when MMU is off so it must not access
+ * any of the virtual mappings.
  */
 static inline int get_page_unless_zero(struct page *page)
 {
 	return atomic_inc_not_zero(&page->_count);
 }
 
+/*
+ * Try to drop a ref unless the page has a refcount of one, return false if
+ * that is the case.
+ * This is to make sure that the refcount won't become zero after this drop.
+ * This can be called when MMU is off so it must not access
+ * any of the virtual mappings.
+ */
+static inline int put_page_unless_one(struct page *page)
+{
+	return atomic_add_unless(&page->_count, -1, 1);
+}
+
 extern int page_is_ram(unsigned long pfn);
 
 /* Support for virtually mapped pages */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 6d53675c2b54..98ada58f9942 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -329,7 +329,9 @@ static inline void set_page_writeback(struct page *page)
  * System with lots of page flags available. This allows separate
  * flags for PageHead() and PageTail() checks of compound pages so that bit
  * tests can be used in performance sensitive paths. PageCompound is
- * generally not used in hot code paths.
+ * generally not used in hot code paths except arch/powerpc/mm/init_64.c
+ * and arch/powerpc/kvm/book3s_64_vio_hv.c which use it to detect huge pages
+ * and avoid handling those in real mode.
  */
 __PAGEFLAG(Head, head) CLEARPAGEFLAG(Head, head)
 __PAGEFLAG(Tail, tail)
-- 
cgit v1.2.3


From e277e656804c85a0729d4fd8cdd3c8ab3e6b3b86 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Fri, 11 Oct 2013 09:30:24 +0800
Subject: regulator: Remove max_uV from struct regulator_linear_range

linear ranges means each range has linear voltage settings.
So we can calculate max_uV for each linear range in regulator core rather than
set the max_uV field in drivers.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/regulator/driver.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 9bdad43ad228..997ff5c4d880 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -46,14 +46,12 @@ enum regulator_status {
  * regulator_list_linear_range().
  *
  * @min_uV:  Lowest voltage in range
- * @max_uV:  Highest voltage in range
  * @min_sel: Lowest selector for range
  * @max_sel: Highest selector for range
  * @uV_step: Step size
  */
 struct regulator_linear_range {
 	unsigned int min_uV;
-	unsigned int max_uV;
 	unsigned int min_sel;
 	unsigned int max_sel;
 	unsigned int uV_step;
-- 
cgit v1.2.3


From 8828bae464b129abed95b748263f1ab53bdc5755 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Fri, 11 Oct 2013 09:32:18 +0800
Subject: regulator: Add REGULATOR_LINEAR_RANGE macro

Add REGULATOR_LINEAR_RANGE macro and convert regulator drivers to use it.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/regulator/driver.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 997ff5c4d880..edb11b716dd3 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -57,6 +57,15 @@ struct regulator_linear_range {
 	unsigned int uV_step;
 };
 
+/* Initialize struct regulator_linear_range */
+#define REGULATOR_LINEAR_RANGE(_min_uV, _min_sel, _max_sel, _step_uV)	\
+{									\
+	.min_uV		= _min_uV,					\
+	.min_sel	= _min_sel,					\
+	.max_sel	= _max_sel,					\
+	.uV_step	= _step_uV,					\
+}
+
 /**
  * struct regulator_ops - regulator operations.
  *
-- 
cgit v1.2.3


From 2841a5fc375e9c573d10b82db30fa8a4cc25301c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@linaro.org>
Date: Sat, 5 Oct 2013 00:23:12 +0100
Subject: spi: Provide per-message prepare and unprepare operations

Many SPI drivers perform setup and tear down on every message, usually
doing things like DMA mapping the message. Provide hooks for them to use
to provide such operations.

This is of limited value for drivers that implement transfer_one_message()
but will be of much greater utility with future factoring out of standard
implementations of that function.

Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/spi/spi.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 887116dbce2c..000b50bee6c0 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -257,6 +257,8 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  * @queue_lock: spinlock to syncronise access to message queue
  * @queue: message queue
  * @cur_msg: the currently in-flight message
+ * @cur_msg_prepared: spi_prepare_message was called for the currently
+ *                    in-flight message
  * @busy: message pump is busy
  * @running: message pump is running
  * @rt: whether this queue is set to run as a realtime task
@@ -274,6 +276,10 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  * @unprepare_transfer_hardware: there are currently no more messages on the
  *	queue so the subsystem notifies the driver that it may relax the
  *	hardware by issuing this call
+ * @prepare_message: set up the controller to transfer a single message,
+ *                   for example doing DMA mapping.  Called from threaded
+ *                   context.
+ * @unprepare_message: undo any work done by prepare_message().
  * @cs_gpios: Array of GPIOs to use as chip select lines; one per CS
  *	number. Any individual value may be -ENOENT for CS lines that
  *	are not GPIOs (driven by the SPI controller itself).
@@ -388,11 +394,16 @@ struct spi_master {
 	bool				running;
 	bool				rt;
 	bool				auto_runtime_pm;
+	bool                            cur_msg_prepared;
 
 	int (*prepare_transfer_hardware)(struct spi_master *master);
 	int (*transfer_one_message)(struct spi_master *master,
 				    struct spi_message *mesg);
 	int (*unprepare_transfer_hardware)(struct spi_master *master);
+	int (*prepare_message)(struct spi_master *master,
+			       struct spi_message *message);
+	int (*unprepare_message)(struct spi_master *master,
+				 struct spi_message *message);
 
 	/* gpio chip select */
 	int			*cs_gpios;
-- 
cgit v1.2.3


From b158935f70b9c156903338053216dd0adf7ce31c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@linaro.org>
Date: Sat, 5 Oct 2013 11:50:40 +0100
Subject: spi: Provide common spi_message processing loop

The loops which SPI controller drivers use to process the list of transfers
in a spi_message are typically very similar and have some error prone areas
such as the handling of /CS. Help simplify drivers by factoring this code
out into the core - if drivers provide a transfer_one() function instead
of a transfer_one_message() function the core will handle processing at the
message level.

/CS can be controlled by either setting cs_gpio or providing a set_cs
function. If this is not possible for hardware reasons then both can be
omitted and the driver should continue to implement manual /CS handling.

This is a first step in refactoring and it is expected that there will be
further enhancements, for example factoring out of the mapping of transfers
for DMA and the initiation and completion of interrupt driven transfers.

Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/spi/spi.h | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 000b50bee6c0..da371ab5ebeb 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -23,6 +23,7 @@
 #include <linux/mod_devicetable.h>
 #include <linux/slab.h>
 #include <linux/kthread.h>
+#include <linux/completion.h>
 
 /*
  * INTERFACES between SPI master-side drivers and SPI infrastructure.
@@ -150,8 +151,7 @@ static inline void *spi_get_drvdata(struct spi_device *spi)
 }
 
 struct spi_message;
-
-
+struct spi_transfer;
 
 /**
  * struct spi_driver - Host side "protocol" driver
@@ -259,6 +259,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  * @cur_msg: the currently in-flight message
  * @cur_msg_prepared: spi_prepare_message was called for the currently
  *                    in-flight message
+ * @xfer_completion: used by core tranfer_one_message()
  * @busy: message pump is busy
  * @running: message pump is running
  * @rt: whether this queue is set to run as a realtime task
@@ -276,9 +277,15 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  * @unprepare_transfer_hardware: there are currently no more messages on the
  *	queue so the subsystem notifies the driver that it may relax the
  *	hardware by issuing this call
+ * @set_cs: assert or deassert chip select, true to assert.  May be called
+ *          from interrupt context.
  * @prepare_message: set up the controller to transfer a single message,
  *                   for example doing DMA mapping.  Called from threaded
  *                   context.
+ * @transfer_one: transfer a single spi_transfer. When the
+ *	          driver is finished with this transfer it must call
+ *	          spi_finalize_current_transfer() so the subsystem can issue
+ *                the next transfer
  * @unprepare_message: undo any work done by prepare_message().
  * @cs_gpios: Array of GPIOs to use as chip select lines; one per CS
  *	number. Any individual value may be -ENOENT for CS lines that
@@ -395,6 +402,7 @@ struct spi_master {
 	bool				rt;
 	bool				auto_runtime_pm;
 	bool                            cur_msg_prepared;
+	struct completion               xfer_completion;
 
 	int (*prepare_transfer_hardware)(struct spi_master *master);
 	int (*transfer_one_message)(struct spi_master *master,
@@ -405,6 +413,14 @@ struct spi_master {
 	int (*unprepare_message)(struct spi_master *master,
 				 struct spi_message *message);
 
+	/*
+	 * These hooks are for drivers that use a generic implementation
+	 * of transfer_one_message() provied by the core.
+	 */
+	void (*set_cs)(struct spi_device *spi, bool enable);
+	int (*transfer_one)(struct spi_master *master, struct spi_device *spi,
+			    struct spi_transfer *transfer);
+
 	/* gpio chip select */
 	int			*cs_gpios;
 };
@@ -439,6 +455,7 @@ extern int spi_master_resume(struct spi_master *master);
 /* Calls the driver make to interact with the message queue */
 extern struct spi_message *spi_get_next_queued_message(struct spi_master *master);
 extern void spi_finalize_current_message(struct spi_master *master);
+extern void spi_finalize_current_transfer(struct spi_master *master);
 
 /* the spi driver core manages memory for the spi_master classdev */
 extern struct spi_master *
-- 
cgit v1.2.3


From 674d0ed8588c11ec9f70c8427ac83a73e0d156d5 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Fri, 13 Sep 2013 10:59:27 -0700
Subject: hwmon: (atxp1) Set and use error code from vid_to_reg()

vid_to_reg() returns -1 if it encounters an error. Return -EINVAL instead.
Its only caller, atxp1_storevcore(), doesn't use the return code but
returns -1 instead, which is wrong anyway as it means -EPERM.
Use the return value from vid_to_reg() instead to report the error.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Jean Delvare <khali@linux-fr.org>
---
 include/linux/hwmon-vid.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hwmon-vid.h b/include/linux/hwmon-vid.h
index f346e4d5381c..da0a680e2f6d 100644
--- a/include/linux/hwmon-vid.h
+++ b/include/linux/hwmon-vid.h
@@ -38,7 +38,7 @@ static inline int vid_to_reg(int val, u8 vrm)
 		return ((val >= 1100) && (val <= 1850) ?
 			((18499 - val * 10) / 25 + 5) / 10 : -1);
 	default:
-		return -1;
+		return -EINVAL;
 	}
 }
 
-- 
cgit v1.2.3


From bab2243ce1897865e31ea6d59b0478391f51812b Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sat, 6 Jul 2013 13:57:23 -0700
Subject: hwmon: Introduce hwmon_device_register_with_groups

hwmon_device_register_with_groups() lets callers register a hwmon device
together with all sysfs attributes in a single call.

When using hwmon_device_register_with_groups(), hwmon attributes are attached
to the hwmon device directly and no longer with its parent device.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 include/linux/hwmon.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index b2514f70d591..6d02ff77ae1a 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -15,8 +15,13 @@
 #define _HWMON_H_
 
 struct device;
+struct attribute_group;
 
 struct device *hwmon_device_register(struct device *dev);
+struct device *
+hwmon_device_register_with_groups(struct device *dev, const char *name,
+				  void *drvdata,
+				  const struct attribute_group **groups);
 
 void hwmon_device_unregister(struct device *dev);
 
-- 
cgit v1.2.3


From 74188cba088192e14cd7fd5433876e8c947bcdd8 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Thu, 11 Jul 2013 20:00:12 -0700
Subject: hwmon: Provide managed hwmon registration

Drivers using the new hwmon_device_register_with_groups API often have a
remove function which consists solely of a call hwmon_device_unregister().

Provide support for devm_hwmon_device_register_with_groups and
devm_hwmon_device_unregister to allow this repeated code to be removed
and help eliminate error handling code.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 include/linux/hwmon.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index 6d02ff77ae1a..09354f6c1d63 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -22,7 +22,12 @@ struct device *
 hwmon_device_register_with_groups(struct device *dev, const char *name,
 				  void *drvdata,
 				  const struct attribute_group **groups);
+struct device *
+devm_hwmon_device_register_with_groups(struct device *dev, const char *name,
+				       void *drvdata,
+				       const struct attribute_group **groups);
 
 void hwmon_device_unregister(struct device *dev);
+void devm_hwmon_device_unregister(struct device *dev);
 
 #endif
-- 
cgit v1.2.3


From e33fabd365596178e72f62bb4b89f0aaad0509ad Mon Sep 17 00:00:00 2001
From: Anthony Olech <anthony.olech.opensource@diasemi.com>
Date: Fri, 11 Oct 2013 15:31:11 +0100
Subject: regmap: new API regmap_multi_reg_write() definition

New API regmap_multi_reg_write() is defined that allows a set of reg,val
pairs to be written to a I2C client device as one block transfer from the
point of view of a single I2C master system.

A simple demonstration implementation is included that just splits the
block write request into a sequence of single register writes.

The implementation will be modified later to support those I2C clients
that implement the alternative non-standard MULTIWRITE block write mode
so to achieve a single I2C transfer that will be atomic even in multiple
I2C master systems.

Signed-off-by: Anthony Olech <anthony.olech.opensource@diasemi.com>
Signed-off-by: David Dajun Chen <david.chen@diasemi.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/regmap.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index a10380bfbeac..4b933a31f84f 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -378,6 +378,8 @@ int regmap_raw_write(struct regmap *map, unsigned int reg,
 		     const void *val, size_t val_len);
 int regmap_bulk_write(struct regmap *map, unsigned int reg, const void *val,
 			size_t val_count);
+int regmap_multi_reg_write(struct regmap *map, struct reg_default *regs,
+			int num_regs);
 int regmap_raw_write_async(struct regmap *map, unsigned int reg,
 			   const void *val, size_t val_len);
 int regmap_read(struct regmap *map, unsigned int reg, unsigned int *val);
-- 
cgit v1.2.3


From d1cb9d1af0bc11b7450a6032f43935c746609418 Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Thu, 3 Oct 2013 17:24:51 -0400
Subject: of: Make cpu node handling more portable.

Use for_each_node_by_type() to iterate all cpu nodes in the
system.

Provide and overridable function arch_find_n_match_cpu_physical_id,
which sees if the given device node matches 'cpu' and if so sets
'*thread' when non-NULL to the cpu thread number within the core.

The default implementation behaves the same as the existing code.

Add a sparc64 implementation.

Signed-off-by: David S. Miller <davem@davemloft.net>
Tested-by: Sudeep KarkadaNagesha <Sudeep.KarkadaNagesha@arm.com>
Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 include/linux/cpu.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 801ff9e73679..fbd25c3c2923 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -18,6 +18,7 @@
 #include <linux/cpumask.h>
 
 struct device;
+struct device_node;
 
 struct cpu {
 	int node_id;		/* The node which contains the CPU */
@@ -29,6 +30,8 @@ extern int register_cpu(struct cpu *cpu, int num);
 extern struct device *get_cpu_device(unsigned cpu);
 extern bool cpu_is_hotpluggable(unsigned cpu);
 extern bool arch_match_cpu_phys_id(int cpu, u64 phys_id);
+extern bool arch_find_n_match_cpu_physical_id(struct device_node *cpun,
+					      int cpu, unsigned int *thread);
 
 extern int cpu_add_dev_attr(struct device_attribute *attr);
 extern void cpu_remove_dev_attr(struct device_attribute *attr);
-- 
cgit v1.2.3


From d468bf9ecaabd3bf3a6134e5a369ced82b1d1ca1 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 24 Sep 2013 11:54:38 +0200
Subject: gpio: add API to be strict about GPIO IRQ usage

It is currently often possible in many GPIO drivers to request
a GPIO line to be used as IRQ after calling gpio_to_irq() and,
as the gpiolib is not aware of this, set the same line to
output and start driving it, with undesired side effects.

As it is a bogus usage scenario to request a line flagged as
output to used as IRQ, we introduce APIs to let gpiolib track
the use of a line as IRQ, and also set this flag from the
userspace ABI.

The API is symmetric so that lines can also be flagged from
.irq_enable() and unflagged from IRQ by .irq_disable().
The debugfs file is altered so that we see if a line is
reserved for IRQ.

Cc: Enric Balletbo i Serra <eballetbo@gmail.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Cc: Santosh Shilimkar <santosh.shilimkar@ti.com>
Acked-by: Alexandre Courbot <acourbot@nvidia.com>
Reviewed-by: Stephen Warren <swarren@nvidia.com>
Reviewed-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 552e3f46e4a3..a06ec3e85ba3 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -204,6 +204,18 @@ static inline int gpio_to_irq(unsigned gpio)
 	return -EINVAL;
 }
 
+static inline int gpio_lock_as_irq(struct gpio_chip *chip, unsigned int offset)
+{
+	WARN_ON(1);
+	return -EINVAL;
+}
+
+static inline void gpio_unlock_as_irq(struct gpio_chip *chip,
+				      unsigned int offset)
+{
+	WARN_ON(1);
+}
+
 static inline int irq_to_gpio(unsigned irq)
 {
 	/* irq can never have been returned from gpio_to_irq() */
-- 
cgit v1.2.3


From 8922915b38cd8b72f8e5af614b95be71d1d299d4 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 7 Oct 2013 20:31:06 +0200
Subject: sched/wait: Add ___wait_cond_timeout() to wait_event*_timeout() too

Commit 4c663cfc ("wait: fix false timeouts when using
wait_event_timeout()") introduced the additional condition checks
after a timeout but only in the "slow" __wait*() paths.

wait_event_timeout(wq, CONDITION, 0) still returns 0 if CONDITION
is already true and we do not call __wait*().

Now that we have ___wait_cond_timeout() we can use it instead to
ensure that __ret will be properly updated.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131007183106.GA10973@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index a2726c7dd244..04c0260bda8f 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -270,7 +270,7 @@ do {									\
 #define wait_event_timeout(wq, condition, timeout)			\
 ({									\
 	long __ret = timeout;						\
-	if (!(condition))						\
+	if (!___wait_cond_timeout(condition))				\
 		__ret = __wait_event_timeout(wq, condition, timeout);	\
 	__ret;								\
 })
@@ -328,7 +328,7 @@ do {									\
 #define wait_event_interruptible_timeout(wq, condition, timeout)	\
 ({									\
 	long __ret = timeout;						\
-	if (!(condition))						\
+	if (!___wait_cond_timeout(condition))				\
 		__ret = __wait_event_interruptible_timeout(wq,		\
 						condition, timeout);	\
 	__ret;								\
@@ -769,7 +769,7 @@ do {									\
 						  timeout)		\
 ({									\
 	long __ret = timeout;						\
-	if (!(condition))						\
+	if (!___wait_cond_timeout(condition))				\
 		__ret = __wait_event_interruptible_lock_irq_timeout(	\
 					wq, condition, lock, timeout);	\
 	__ret;								\
-- 
cgit v1.2.3


From c2d816443ef305aba8eaf0bf368f4d3d87494f06 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 7 Oct 2013 18:18:24 +0200
Subject: sched/wait: Introduce prepare_to_wait_event()

Add the new helper, prepare_to_wait_event() which should only be used
by ___wait_event().

prepare_to_wait_event() returns -ERESTARTSYS if signal_pending_state()
is true, otherwise it does prepare_to_wait/exclusive.  This allows to
uninline the signal-pending checks in wait_event*() macros.

Also, it can initialize wait->private/func. We do not care if they were
already initialized, the values are the same. This also shaves a couple
of insns from the inlined code.

This obviously makes prepare_*() path a little bit slower, but we are
likely going to sleep anyway, so I think it makes sense to shrink .text:

               text    data      bss      dec     hex  filename
            ===================================================
   before:  5126092 2959248 10117120 18202460 115bf5c   vmlinux
    after:  5124618 2955152 10117120 18196890 115a99a   vmlinux

on my build.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131007161824.GA29757@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 04c0260bda8f..ec099b03e11b 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -187,27 +187,30 @@ wait_queue_head_t *bit_waitqueue(void *, int);
 	__cond || !__ret;						\
 })
 
-#define ___wait_signal_pending(state)					\
-	((state == TASK_INTERRUPTIBLE && signal_pending(current)) ||	\
-	 (state == TASK_KILLABLE && fatal_signal_pending(current)))
+#define ___wait_is_interruptible(state)					\
+	(!__builtin_constant_p(state) ||				\
+		state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE)	\
 
 #define ___wait_event(wq, condition, state, exclusive, ret, cmd)	\
 ({									\
 	__label__ __out;						\
-	DEFINE_WAIT(__wait);						\
+	wait_queue_t __wait;						\
 	long __ret = ret;						\
 									\
+	INIT_LIST_HEAD(&__wait.task_list);				\
+	if (exclusive)							\
+		__wait.flags = WQ_FLAG_EXCLUSIVE;			\
+	else								\
+		__wait.flags = 0;					\
+									\
 	for (;;) {							\
-		if (exclusive)						\
-			prepare_to_wait_exclusive(&wq, &__wait, state); \
-		else							\
-			prepare_to_wait(&wq, &__wait, state);		\
+		long __int = prepare_to_wait_event(&wq, &__wait, state);\
 									\
 		if (condition)						\
 			break;						\
 									\
-		if (___wait_signal_pending(state)) {			\
-			__ret = -ERESTARTSYS;				\
+		if (___wait_is_interruptible(state) && __int) {		\
+			__ret = __int;					\
 			if (exclusive) {				\
 				abort_exclusive_wait(&wq, &__wait,	\
 						     state, NULL);	\
@@ -791,6 +794,7 @@ extern long interruptible_sleep_on_timeout(wait_queue_head_t *q, signed long tim
  */
 void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state);
 void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state);
+long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state);
 void finish_wait(wait_queue_head_t *q, wait_queue_t *wait);
 void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, unsigned int mode, void *key);
 int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
-- 
cgit v1.2.3


From 586a87e6edc936d6d3c3585af504b33b9c3f0a06 Mon Sep 17 00:00:00 2001
From: Christian Ruppert <christian.ruppert@abilis.com>
Date: Tue, 15 Oct 2013 15:37:54 +0200
Subject: pinctrl/gpio: non-linear GPIO ranges accesible from gpiolib

This patch adds the infrastructure required to register non-linear gpio
ranges through gpiolib and the standard GPIO device tree bindings.

Signed-off-by: Christian Ruppert <christian.ruppert@abilis.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio.h            | 10 ++++++++++
 include/linux/pinctrl/pinctrl.h |  3 +++
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 552e3f46e4a3..b8d0e53a802f 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -80,6 +80,7 @@ static inline int irq_to_gpio(unsigned int irq)
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/bug.h>
+#include <linux/pinctrl/pinctrl.h>
 
 struct device;
 struct gpio_chip;
@@ -220,6 +221,15 @@ gpiochip_add_pin_range(struct gpio_chip *chip, const char *pinctl_name,
 	return -EINVAL;
 }
 
+static inline int
+gpiochip_add_pingroup_range(struct gpio_chip *chip,
+			struct pinctrl_dev *pctldev,
+			unsigned int gpio_offset, const char *pin_group)
+{
+	WARN_ON(1);
+	return -EINVAL;
+}
+
 static inline void
 gpiochip_remove_pin_ranges(struct gpio_chip *chip)
 {
diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h
index 5979147d2bda..fefb88663975 100644
--- a/include/linux/pinctrl/pinctrl.h
+++ b/include/linux/pinctrl/pinctrl.h
@@ -144,6 +144,9 @@ extern struct pinctrl_dev *pinctrl_find_and_add_gpio_range(const char *devname,
 extern struct pinctrl_gpio_range *
 pinctrl_find_gpio_range_from_pin(struct pinctrl_dev *pctldev,
 				 unsigned int pin);
+extern int pinctrl_get_group_pins(struct pinctrl_dev *pctldev,
+				const char *pin_group, const unsigned **pins,
+				unsigned *num_pins);
 
 #ifdef CONFIG_OF
 extern struct pinctrl_dev *of_pinctrl_get(struct device_node *np);
-- 
cgit v1.2.3


From 420f9739a62cdb027f5580d25c813501ff93aa6f Mon Sep 17 00:00:00 2001
From: David Henningsson <david.henningsson@canonical.com>
Date: Wed, 16 Oct 2013 23:10:31 +0200
Subject: thinkpad-acpi: Add mute and mic-mute LED functionality

The LEDs are currently not visible to userspace, for security
reasons. They are exported through thinkpad_acpi.h for use by the
snd-hda-intel driver.

Thanks to Alex Hung <alex.hung@canonical.com> and Takashi Iwai
<tiwai@suse.de> for writing parts of this patch.

Signed-off-by: David Henningsson <david.henningsson@canonical.com>
Acked-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/thinkpad_acpi.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 include/linux/thinkpad_acpi.h

(limited to 'include/linux')

diff --git a/include/linux/thinkpad_acpi.h b/include/linux/thinkpad_acpi.h
new file mode 100644
index 000000000000..361de59a2285
--- /dev/null
+++ b/include/linux/thinkpad_acpi.h
@@ -0,0 +1,15 @@
+#ifndef __THINKPAD_ACPI_H__
+#define __THINKPAD_ACPI_H__
+
+/* These two functions return 0 if success, or negative error code
+   (e g -ENODEV if no led present) */
+
+enum {
+	TPACPI_LED_MUTE,
+	TPACPI_LED_MICMUTE,
+	TPACPI_LED_MAX,
+};
+
+int tpacpi_led_set(int whichled, bool on);
+
+#endif
-- 
cgit v1.2.3


From 40a96d54ee2232045783e657eb9224cd723dcb40 Mon Sep 17 00:00:00 2001
From: David Cohen <david.a.cohen@linux.intel.com>
Date: Thu, 17 Oct 2013 15:35:36 -0700
Subject: intel_mid: Move platform device setups to their own
 platform_<device>.* files

As Intel rolling out more SoC's after Moorestown, we need to
re-structure the code in a way that is backward compatible and easy to
expand. This patch implements a flexible way to support multiple boards
and devices.

This patch does not add any new functional support. It just refactors
the existing code to increase the modularity and decrease the code
duplication for supporting multiple soc's and boards.

Currently intel-mid.c has both board and soc related code in one file.
This patch moves the board related code to new files and let linker
script to create SFI devite table following this:

1. Move the SFI device specific code to
   arch/x86/platform/intel-mid/device-libs/platform_<device>.*
   A new device file is added for every supported device. This code will
   get conditionally compiled by using corresponding device driver
   CONFIG option.

2. Move the device_ids location to .x86_intel_mid_dev.init section by
   using new sfi_device() macro.

This patch was based on previous code from Sathyanarayanan Kuppuswamy.

Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Link: http://lkml.kernel.org/r/1382049336-21316-13-git-send-email-david.a.cohen@linux.intel.com
Signed-off-by: David Cohen <david.a.cohen@linux.intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/linux/sfi.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sfi.h b/include/linux/sfi.h
index fe817918b30e..d9b436f09925 100644
--- a/include/linux/sfi.h
+++ b/include/linux/sfi.h
@@ -59,6 +59,9 @@
 #ifndef _LINUX_SFI_H
 #define _LINUX_SFI_H
 
+#include <linux/init.h>
+#include <linux/types.h>
+
 /* Table signatures reserved by the SFI specification */
 #define SFI_SIG_SYST		"SYST"
 #define SFI_SIG_FREQ		"FREQ"
-- 
cgit v1.2.3


From a06ccd9c3785fa5550917ae036944f4e080b5749 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Tue, 15 Oct 2013 20:14:20 +0100
Subject: regulator: core: Add ability to create a lookup alias for supply

These patches add the ability to create an alternative device on which
a lookup for a certain supply should be conducted.

A common use-case for this would be devices that are logically
represented as a collection of drivers within Linux but are are
presented as a single device from device tree. It this case it is
necessary for each sub device to locate their supply data on the main
device.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/regulator/consumer.h | 79 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 79 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 27be915caa96..e530681bea70 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -146,6 +146,32 @@ struct regulator *__must_check devm_regulator_get_optional(struct device *dev,
 void regulator_put(struct regulator *regulator);
 void devm_regulator_put(struct regulator *regulator);
 
+int regulator_register_supply_alias(struct device *dev, const char *id,
+				    struct device *alias_dev,
+				    const char *alias_id);
+void regulator_unregister_supply_alias(struct device *dev, const char *id);
+
+int regulator_bulk_register_supply_alias(struct device *dev, const char **id,
+					 struct device *alias_dev,
+					 const char **alias_id, int num_id);
+void regulator_bulk_unregister_supply_alias(struct device *dev,
+					    const char **id, int num_id);
+
+int devm_regulator_register_supply_alias(struct device *dev, const char *id,
+					 struct device *alias_dev,
+					 const char *alias_id);
+void devm_regulator_unregister_supply_alias(struct device *dev,
+					    const char *id);
+
+int devm_regulator_bulk_register_supply_alias(struct device *dev,
+					      const char **id,
+					      struct device *alias_dev,
+					      const char **alias_id,
+					      int num_id);
+void devm_regulator_bulk_unregister_supply_alias(struct device *dev,
+						 const char **id,
+						 int num_id);
+
 /* regulator output control and status */
 int __must_check regulator_enable(struct regulator *regulator);
 int regulator_disable(struct regulator *regulator);
@@ -250,6 +276,59 @@ static inline void devm_regulator_put(struct regulator *regulator)
 {
 }
 
+static inline int regulator_register_supply_alias(struct device *dev,
+						  const char *id,
+						  struct device *alias_dev,
+						  const char *alias_id)
+{
+	return 0;
+}
+
+static inline void regulator_unregister_supply_alias(struct device *dev,
+						    const char *id)
+{
+}
+
+static inline int regulator_bulk_register_supply_alias(struct device *dev,
+						       const char **id,
+						       struct device *alias_dev,
+						       const char **alias_id,
+						       int num_id)
+{
+	return 0;
+}
+
+static inline void regulator_bulk_unregister_supply_alias(struct device *dev,
+							  const char **id,
+							  int num_id)
+{
+}
+
+static inline int devm_regulator_register_supply_alias(struct device *dev,
+						       const char *id,
+						       struct device *alias_dev,
+						       const char *alias_id)
+{
+	return 0;
+}
+
+static inline void devm_regulator_unregister_supply_alias(struct device *dev,
+							  const char *id)
+{
+}
+
+static inline int devm_regulator_bulk_register_supply_alias(
+		struct device *dev, const char **id, struct device *alias_dev,
+		const char **alias_id, int num_id)
+{
+	return 0;
+}
+
+static inline void devm_regulator_bulk_unregister_supply_alias(
+		struct device *dev, const char **id, int num_id)
+{
+}
+
 static inline int regulator_enable(struct regulator *regulator)
 {
 	return 0;
-- 
cgit v1.2.3


From 79a9becda8940deb2274b5aa4577c86d52ee7ecb Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Thu, 17 Oct 2013 10:21:36 -0700
Subject: gpiolib: export descriptor-based GPIO interface

This patch exports the gpiod_* family of API functions, a safer
alternative to the legacy GPIO interface. Differences between the gpiod
and legacy gpio APIs are:

- gpio works with integers, whereas gpiod operates on opaque handlers
  which cannot be forged or used before proper acquisition
- gpiod get/set functions are aware of the active low state of a GPIO
- gpio consumers should now include <linux/gpio/consumer.h> to access
  the new interface, whereas chips drivers will use
  <linux/gpio/driver.h>

The legacy gpio API is now built as inline functions on top of gpiod.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio.h          |  11 +-
 include/linux/gpio/consumer.h | 238 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/gpio/driver.h   | 127 ++++++++++++++++++++++
 3 files changed, 372 insertions(+), 4 deletions(-)
 create mode 100644 include/linux/gpio/consumer.h
 create mode 100644 include/linux/gpio/driver.h

(limited to 'include/linux')

diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index a06ec3e85ba3..c691df044458 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -16,14 +16,17 @@
 #define GPIOF_OUT_INIT_LOW	(GPIOF_DIR_OUT | GPIOF_INIT_LOW)
 #define GPIOF_OUT_INIT_HIGH	(GPIOF_DIR_OUT | GPIOF_INIT_HIGH)
 
+/* Gpio pin is active-low */
+#define GPIOF_ACTIVE_LOW        (1 << 2)
+
 /* Gpio pin is open drain */
-#define GPIOF_OPEN_DRAIN	(1 << 2)
+#define GPIOF_OPEN_DRAIN	(1 << 3)
 
 /* Gpio pin is open source */
-#define GPIOF_OPEN_SOURCE	(1 << 3)
+#define GPIOF_OPEN_SOURCE	(1 << 4)
 
-#define GPIOF_EXPORT		(1 << 4)
-#define GPIOF_EXPORT_CHANGEABLE	(1 << 5)
+#define GPIOF_EXPORT		(1 << 5)
+#define GPIOF_EXPORT_CHANGEABLE	(1 << 6)
 #define GPIOF_EXPORT_DIR_FIXED	(GPIOF_EXPORT)
 #define GPIOF_EXPORT_DIR_CHANGEABLE (GPIOF_EXPORT | GPIOF_EXPORT_CHANGEABLE)
 
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
new file mode 100644
index 000000000000..2088eb50421c
--- /dev/null
+++ b/include/linux/gpio/consumer.h
@@ -0,0 +1,238 @@
+#ifndef __LINUX_GPIO_CONSUMER_H
+#define __LINUX_GPIO_CONSUMER_H
+
+#include <linux/err.h>
+#include <linux/kernel.h>
+
+#ifdef CONFIG_GPIOLIB
+
+struct device;
+struct gpio_chip;
+
+/**
+ * Opaque descriptor for a GPIO. These are obtained using gpiod_get() and are
+ * preferable to the old integer-based handles.
+ *
+ * Contrary to integers, a pointer to a gpio_desc is guaranteed to be valid
+ * until the GPIO is released.
+ */
+struct gpio_desc;
+
+int gpiod_get_direction(const struct gpio_desc *desc);
+int gpiod_direction_input(struct gpio_desc *desc);
+int gpiod_direction_output(struct gpio_desc *desc, int value);
+
+/* Value get/set from non-sleeping context */
+int gpiod_get_value(const struct gpio_desc *desc);
+void gpiod_set_value(struct gpio_desc *desc, int value);
+int gpiod_get_raw_value(const struct gpio_desc *desc);
+void gpiod_set_raw_value(struct gpio_desc *desc, int value);
+
+/* Value get/set from sleeping context */
+int gpiod_get_value_cansleep(const struct gpio_desc *desc);
+void gpiod_set_value_cansleep(struct gpio_desc *desc, int value);
+int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc);
+void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value);
+
+int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce);
+
+int gpiod_is_active_low(const struct gpio_desc *desc);
+int gpiod_cansleep(const struct gpio_desc *desc);
+
+int gpiod_to_irq(const struct gpio_desc *desc);
+
+/* Convert between the old gpio_ and new gpiod_ interfaces */
+struct gpio_desc *gpio_to_desc(unsigned gpio);
+int desc_to_gpio(const struct gpio_desc *desc);
+struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc);
+
+#else /* CONFIG_GPIOLIB */
+
+static inline struct gpio_desc *__must_check gpiod_get(struct device *dev,
+						       const char *con_id)
+{
+	return ERR_PTR(-ENOSYS);
+}
+static inline struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
+							     const char *con_id,
+							     unsigned int idx)
+{
+	return ERR_PTR(-ENOSYS);
+}
+static inline void gpiod_put(struct gpio_desc *desc)
+{
+	might_sleep();
+
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+}
+
+static inline struct gpio_desc *__must_check devm_gpiod_get(struct device *dev,
+							    const char *con_id)
+{
+	return ERR_PTR(-ENOSYS);
+}
+static inline
+struct gpio_desc *__must_check devm_gpiod_get_index(struct device *dev,
+						    const char *con_id,
+						    unsigned int idx)
+{
+	return ERR_PTR(-ENOSYS);
+}
+static inline void devm_gpiod_put(struct device *dev, struct gpio_desc *desc)
+{
+	might_sleep();
+
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+}
+
+
+static inline int gpiod_get_direction(const struct gpio_desc *desc)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+	return -ENOSYS;
+}
+static inline int gpiod_direction_input(struct gpio_desc *desc)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+	return -ENOSYS;
+}
+static inline int gpiod_direction_output(struct gpio_desc *desc, int value)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+	return -ENOSYS;
+}
+
+
+static inline int gpiod_get_value(const struct gpio_desc *desc)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+	return 0;
+}
+static inline void gpiod_set_value(struct gpio_desc *desc, int value)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+}
+static inline int gpiod_get_raw_value(const struct gpio_desc *desc)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+	return 0;
+}
+static inline void gpiod_set_raw_value(struct gpio_desc *desc, int value)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+}
+
+static inline int gpiod_get_value_cansleep(const struct gpio_desc *desc)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+	return 0;
+}
+static inline void gpiod_set_value_cansleep(struct gpio_desc *desc, int value)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+}
+static inline int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+	return 0;
+}
+static inline void gpiod_set_raw_value_cansleep(struct gpio_desc *desc,
+						int value)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+}
+
+static inline int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+	return -ENOSYS;
+}
+
+static inline int gpiod_is_active_low(const struct gpio_desc *desc)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+	return 0;
+}
+static inline int gpiod_cansleep(const struct gpio_desc *desc)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+	return 0;
+}
+
+static inline int gpiod_to_irq(const struct gpio_desc *desc)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+	return -EINVAL;
+}
+
+static inline struct gpio_desc *gpio_to_desc(unsigned gpio)
+{
+	return ERR_PTR(-EINVAL);
+}
+static inline int desc_to_gpio(const struct gpio_desc *desc)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+	return -EINVAL;
+}
+static inline struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+	return ERR_PTR(-ENODEV);
+}
+
+
+#endif /* CONFIG_GPIOLIB */
+
+#if IS_ENABLED(CONFIG_GPIOLIB) && IS_ENABLED(CONFIG_GPIO_SYSFS)
+
+int gpiod_export(struct gpio_desc *desc, bool direction_may_change);
+int gpiod_export_link(struct device *dev, const char *name,
+		      struct gpio_desc *desc);
+int gpiod_sysfs_set_active_low(struct gpio_desc *desc, int value);
+void gpiod_unexport(struct gpio_desc *desc);
+
+#else  /* CONFIG_GPIOLIB && CONFIG_GPIO_SYSFS */
+
+static inline int gpiod_export(struct gpio_desc *desc,
+			       bool direction_may_change)
+{
+	return -ENOSYS;
+}
+
+static inline int gpiod_export_link(struct device *dev, const char *name,
+				    struct gpio_desc *desc)
+{
+	return -ENOSYS;
+}
+
+static inline int gpiod_sysfs_set_active_low(struct gpio_desc *desc, int value)
+{
+	return -ENOSYS;
+}
+
+static inline void gpiod_unexport(struct gpio_desc *desc)
+{
+}
+
+#endif /* CONFIG_GPIOLIB && CONFIG_GPIO_SYSFS */
+
+#endif
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
new file mode 100644
index 000000000000..5dc172c72f0f
--- /dev/null
+++ b/include/linux/gpio/driver.h
@@ -0,0 +1,127 @@
+#ifndef __LINUX_GPIO_DRIVER_H
+#define __LINUX_GPIO_DRIVER_H
+
+#include <linux/types.h>
+
+struct device;
+struct gpio_desc;
+
+/**
+ * struct gpio_chip - abstract a GPIO controller
+ * @label: for diagnostics
+ * @dev: optional device providing the GPIOs
+ * @owner: helps prevent removal of modules exporting active GPIOs
+ * @list: links gpio_chips together for traversal
+ * @request: optional hook for chip-specific activation, such as
+ *	enabling module power and clock; may sleep
+ * @free: optional hook for chip-specific deactivation, such as
+ *	disabling module power and clock; may sleep
+ * @get_direction: returns direction for signal "offset", 0=out, 1=in,
+ *	(same as GPIOF_DIR_XXX), or negative error
+ * @direction_input: configures signal "offset" as input, or returns error
+ * @direction_output: configures signal "offset" as output, or returns error
+ * @get: returns value for signal "offset"; for output signals this
+ *	returns either the value actually sensed, or zero
+ * @set: assigns output value for signal "offset"
+ * @set_debounce: optional hook for setting debounce time for specified gpio in
+ *      interrupt triggered gpio chips
+ * @to_irq: optional hook supporting non-static gpio_to_irq() mappings;
+ *	implementation may not sleep
+ * @dbg_show: optional routine to show contents in debugfs; default code
+ *	will be used when this is omitted, but custom code can show extra
+ *	state (such as pullup/pulldown configuration).
+ * @base: identifies the first GPIO number handled by this chip; or, if
+ *	negative during registration, requests dynamic ID allocation.
+ * @ngpio: the number of GPIOs handled by this controller; the last GPIO
+ *	handled is (base + ngpio - 1).
+ * @desc: array of ngpio descriptors. Private.
+ * @can_sleep: flag must be set iff get()/set() methods sleep, as they
+ *	must while accessing GPIO expander chips over I2C or SPI
+ * @names: if set, must be an array of strings to use as alternative
+ *      names for the GPIOs in this chip. Any entry in the array
+ *      may be NULL if there is no alias for the GPIO, however the
+ *      array must be @ngpio entries long.  A name can include a single printk
+ *      format specifier for an unsigned int.  It is substituted by the actual
+ *      number of the gpio.
+ *
+ * A gpio_chip can help platforms abstract various sources of GPIOs so
+ * they can all be accessed through a common programing interface.
+ * Example sources would be SOC controllers, FPGAs, multifunction
+ * chips, dedicated GPIO expanders, and so on.
+ *
+ * Each chip controls a number of signals, identified in method calls
+ * by "offset" values in the range 0..(@ngpio - 1).  When those signals
+ * are referenced through calls like gpio_get_value(gpio), the offset
+ * is calculated by subtracting @base from the gpio number.
+ */
+struct gpio_chip {
+	const char		*label;
+	struct device		*dev;
+	struct module		*owner;
+	struct list_head        list;
+
+	int			(*request)(struct gpio_chip *chip,
+						unsigned offset);
+	void			(*free)(struct gpio_chip *chip,
+						unsigned offset);
+	int			(*get_direction)(struct gpio_chip *chip,
+						unsigned offset);
+	int			(*direction_input)(struct gpio_chip *chip,
+						unsigned offset);
+	int			(*direction_output)(struct gpio_chip *chip,
+						unsigned offset, int value);
+	int			(*get)(struct gpio_chip *chip,
+						unsigned offset);
+	void			(*set)(struct gpio_chip *chip,
+						unsigned offset, int value);
+	int			(*set_debounce)(struct gpio_chip *chip,
+						unsigned offset,
+						unsigned debounce);
+
+	int			(*to_irq)(struct gpio_chip *chip,
+						unsigned offset);
+
+	void			(*dbg_show)(struct seq_file *s,
+						struct gpio_chip *chip);
+	int			base;
+	u16			ngpio;
+	struct gpio_desc	*desc;
+	const char		*const *names;
+	unsigned		can_sleep:1;
+	unsigned		exported:1;
+
+#if defined(CONFIG_OF_GPIO)
+	/*
+	 * If CONFIG_OF is enabled, then all GPIO controllers described in the
+	 * device tree automatically may have an OF translation
+	 */
+	struct device_node *of_node;
+	int of_gpio_n_cells;
+	int (*of_xlate)(struct gpio_chip *gc,
+			const struct of_phandle_args *gpiospec, u32 *flags);
+#endif
+#ifdef CONFIG_PINCTRL
+	/*
+	 * If CONFIG_PINCTRL is enabled, then gpio controllers can optionally
+	 * describe the actual pin range which they serve in an SoC. This
+	 * information would be used by pinctrl subsystem to configure
+	 * corresponding pins for gpio usage.
+	 */
+	struct list_head pin_ranges;
+#endif
+};
+
+extern const char *gpiochip_is_requested(struct gpio_chip *chip,
+			unsigned offset);
+
+/* add/remove chips */
+extern int gpiochip_add(struct gpio_chip *chip);
+extern int __must_check gpiochip_remove(struct gpio_chip *chip);
+extern struct gpio_chip *gpiochip_find(void *data,
+			      int (*match)(struct gpio_chip *chip, void *data));
+
+/* lock/unlock as IRQ */
+int gpiod_lock_as_irq(struct gpio_desc *desc);
+void gpiod_unlock_as_irq(struct gpio_desc *desc);
+
+#endif
-- 
cgit v1.2.3


From af8b6375a8291fe2cf77707f3edec86b98a999cc Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Thu, 17 Oct 2013 10:21:37 -0700
Subject: gpiolib: port of_ functions to use gpiod

Refactor the of_ functions of gpiolib to use the now public gpiod
interface, and export of_get_named_gpiod_flags() and
of_get_gpiod_flags() functions.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/of_gpio.h | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h
index a83dc6f5008e..d71f2cc141ae 100644
--- a/include/linux/of_gpio.h
+++ b/include/linux/of_gpio.h
@@ -21,6 +21,7 @@
 #include <linux/of.h>
 
 struct device_node;
+struct gpio_desc;
 
 /*
  * This is Linux-specific flags. By default controllers' and Linux' mapping
@@ -47,7 +48,7 @@ static inline struct of_mm_gpio_chip *to_of_mm_gpio_chip(struct gpio_chip *gc)
 	return container_of(gc, struct of_mm_gpio_chip, gc);
 }
 
-extern int of_get_named_gpio_flags(struct device_node *np,
+extern struct gpio_desc *of_get_named_gpiod_flags(struct device_node *np,
 		const char *list_name, int index, enum of_gpio_flags *flags);
 
 extern int of_mm_gpiochip_add(struct device_node *np,
@@ -62,10 +63,10 @@ extern int of_gpio_simple_xlate(struct gpio_chip *gc,
 #else /* CONFIG_OF_GPIO */
 
 /* Drivers may not strictly depend on the GPIO support, so let them link. */
-static inline int of_get_named_gpio_flags(struct device_node *np,
+static inline struct gpio_desc *of_get_named_gpiod_flags(struct device_node *np,
 		const char *list_name, int index, enum of_gpio_flags *flags)
 {
-	return -ENOSYS;
+	return ERR_PTR(-ENOSYS);
 }
 
 static inline int of_gpio_simple_xlate(struct gpio_chip *gc,
@@ -80,6 +81,18 @@ static inline void of_gpiochip_remove(struct gpio_chip *gc) { }
 
 #endif /* CONFIG_OF_GPIO */
 
+static inline int of_get_named_gpio_flags(struct device_node *np,
+		const char *list_name, int index, enum of_gpio_flags *flags)
+{
+	struct gpio_desc *desc;
+	desc = of_get_named_gpiod_flags(np, list_name, index, flags);
+
+	if (IS_ERR(desc))
+		return PTR_ERR(desc);
+	else
+		return desc_to_gpio(desc);
+}
+
 /**
  * of_gpio_named_count() - Count GPIOs for a device
  * @np:		device node to count GPIOs for
@@ -117,15 +130,21 @@ static inline int of_gpio_count(struct device_node *np)
 }
 
 /**
- * of_get_gpio_flags() - Get a GPIO number and flags to use with GPIO API
+ * of_get_gpiod_flags() - Get a GPIO descriptor and flags to use with GPIO API
  * @np:		device node to get GPIO from
  * @index:	index of the GPIO
  * @flags:	a flags pointer to fill in
  *
- * Returns GPIO number to use with Linux generic GPIO API, or one of the errno
+ * Returns GPIO descriptor to use with Linux generic GPIO API, or a errno
  * value on the error condition. If @flags is not NULL the function also fills
  * in flags for the GPIO.
  */
+static inline struct gpio_desc *of_get_gpiod_flags(struct device_node *np,
+					int index, enum of_gpio_flags *flags)
+{
+	return of_get_named_gpiod_flags(np, "gpios", index, flags);
+}
+
 static inline int of_get_gpio_flags(struct device_node *np, int index,
 		      enum of_gpio_flags *flags)
 {
-- 
cgit v1.2.3


From bae48da237fcedd7ad09569025483b988635efb7 Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Thu, 17 Oct 2013 10:21:38 -0700
Subject: gpiolib: add gpiod_get() and gpiod_put() functions

Add gpiod_get(), gpiod_get_index() and gpiod_put() functions that
provide safer management of GPIOs.

These functions put the GPIO framework in line with the conventions of
other frameworks in the kernel, and help ensure every GPIO is declared
properly and valid while it is used.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/consumer.h | 15 ++++++++++++
 include/linux/gpio/driver.h   | 56 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 2088eb50421c..4d34dbbbad4d 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -18,6 +18,21 @@ struct gpio_chip;
  */
 struct gpio_desc;
 
+/* Acquire and dispose GPIOs */
+struct gpio_desc *__must_check gpiod_get(struct device *dev,
+					 const char *con_id);
+struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
+					       const char *con_id,
+					       unsigned int idx);
+void gpiod_put(struct gpio_desc *desc);
+
+struct gpio_desc *__must_check devm_gpiod_get(struct device *dev,
+					      const char *con_id);
+struct gpio_desc *__must_check devm_gpiod_get_index(struct device *dev,
+						    const char *con_id,
+						    unsigned int idx);
+void devm_gpiod_put(struct device *dev, struct gpio_desc *desc);
+
 int gpiod_get_direction(const struct gpio_desc *desc);
 int gpiod_direction_input(struct gpio_desc *desc);
 int gpiod_direction_output(struct gpio_desc *desc, int value);
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 5dc172c72f0f..cd9da3885d79 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -124,4 +124,60 @@ extern struct gpio_chip *gpiochip_find(void *data,
 int gpiod_lock_as_irq(struct gpio_desc *desc);
 void gpiod_unlock_as_irq(struct gpio_desc *desc);
 
+/**
+ * Lookup table for associating GPIOs to specific devices and functions using
+ * platform data.
+ */
+struct gpiod_lookup {
+	struct list_head list;
+	/*
+	 * name of the chip the GPIO belongs to
+	 */
+	const char *chip_label;
+	/*
+	 * hardware number (i.e. relative to the chip) of the GPIO
+	 */
+	u16 chip_hwnum;
+	/*
+	 * name of device that can claim this GPIO
+	 */
+	const char *dev_id;
+	/*
+	 * name of the GPIO from the device's point of view
+	 */
+	const char *con_id;
+	/*
+	 * index of the GPIO in case several GPIOs share the same name
+	 */
+	unsigned int idx;
+	/*
+	 * mask of GPIOF_* values
+	 */
+	unsigned long flags;
+};
+
+/*
+ * Simple definition of a single GPIO under a con_id
+ */
+#define GPIO_LOOKUP(_chip_label, _chip_hwnum, _dev_id, _con_id, _flags) \
+	GPIO_LOOKUP_IDX(_chip_label, _chip_hwnum, _dev_id, _con_id, 0, _flags)
+
+/*
+ * Use this macro if you need to have several GPIOs under the same con_id.
+ * Each GPIO needs to use a different index and can be accessed using
+ * gpiod_get_index()
+ */
+#define GPIO_LOOKUP_IDX(_chip_label, _chip_hwnum, _dev_id, _con_id, _idx, \
+			_flags)                                           \
+{                                                                         \
+	.chip_label = _chip_label,                                        \
+	.chip_hwnum = _chip_hwnum,                                        \
+	.dev_id = _dev_id,                                                \
+	.con_id = _con_id,                                                \
+	.idx = _idx,                                                      \
+	.flags = _flags,                                                  \
+}
+
+void gpiod_add_table(struct gpiod_lookup *table, size_t size);
+
 #endif
-- 
cgit v1.2.3


From 936e15dd2128eb5aa71251766f1176552b45f43c Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Thu, 10 Oct 2013 11:01:08 +0300
Subject: gpiolib / ACPI: convert to gpiod interfaces

The new GPIO descriptor based interface is now preferred over the old
integer based one. This patch converts the ACPI GPIO helpers to use this
new interface internally. In addition to that provide compatibility
function acpi_get_gpio_by_index() that converts the returned GPIO
descriptor to an integer.

We also drop acpi_get_gpio() as it is not used anywhere outside
gpiolib-acpi and even there we use acpi_get_gpiod() instead.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/acpi_gpio.h | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi_gpio.h b/include/linux/acpi_gpio.h
index 4c120a1e0ca3..b6ce601e55a2 100644
--- a/include/linux/acpi_gpio.h
+++ b/include/linux/acpi_gpio.h
@@ -2,8 +2,10 @@
 #define _LINUX_ACPI_GPIO_H_
 
 #include <linux/device.h>
+#include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 
 /**
  * struct acpi_gpio_info - ACPI GPIO specific information
@@ -15,23 +17,18 @@ struct acpi_gpio_info {
 
 #ifdef CONFIG_GPIO_ACPI
 
-int acpi_get_gpio(char *path, int pin);
-int acpi_get_gpio_by_index(struct device *dev, int index,
-			   struct acpi_gpio_info *info);
+struct gpio_desc *acpi_get_gpiod_by_index(struct device *dev, int index,
+					  struct acpi_gpio_info *info);
 void acpi_gpiochip_request_interrupts(struct gpio_chip *chip);
 void acpi_gpiochip_free_interrupts(struct gpio_chip *chip);
 
 #else /* CONFIG_GPIO_ACPI */
 
-static inline int acpi_get_gpio(char *path, int pin)
+static inline struct gpio_desc *
+acpi_get_gpiod_by_index(struct device *dev, int index,
+			struct acpi_gpio_info *info)
 {
-	return -ENODEV;
-}
-
-static inline int acpi_get_gpio_by_index(struct device *dev, int index,
-					 struct acpi_gpio_info *info)
-{
-	return -ENODEV;
+	return ERR_PTR(-ENOSYS);
 }
 
 static inline void acpi_gpiochip_request_interrupts(struct gpio_chip *chip) { }
@@ -39,4 +36,14 @@ static inline void acpi_gpiochip_free_interrupts(struct gpio_chip *chip) { }
 
 #endif /* CONFIG_GPIO_ACPI */
 
+static inline int acpi_get_gpio_by_index(struct device *dev, int index,
+					 struct acpi_gpio_info *info)
+{
+	struct gpio_desc *desc = acpi_get_gpiod_by_index(dev, index, info);
+
+	if (IS_ERR(desc))
+		return PTR_ERR(desc);
+	return desc_to_gpio(desc);
+}
+
 #endif /* _LINUX_ACPI_GPIO_H_ */
-- 
cgit v1.2.3


From e01f440a689aeb2d0e81c696fe2069f8d01d5d49 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Thu, 10 Oct 2013 11:01:10 +0300
Subject: gpiolib / ACPI: allow passing GPIOF_ACTIVE_LOW for GpioInt resources

The ACPI GpioInt resources contain polarity field that is used to specify
whether the interrupt is active high or low. Since gpiolib supports
GPIOF_ACTIVE_LOW we can pass this information in the flags field in
acpi_find_gpio(), analogous to the DeviceTree version.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/acpi_gpio.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/acpi_gpio.h b/include/linux/acpi_gpio.h
index b6ce601e55a2..d875bc3dba3c 100644
--- a/include/linux/acpi_gpio.h
+++ b/include/linux/acpi_gpio.h
@@ -10,9 +10,11 @@
 /**
  * struct acpi_gpio_info - ACPI GPIO specific information
  * @gpioint: if %true this GPIO is of type GpioInt otherwise type is GpioIo
+ * @active_low: in case of @gpioint, the pin is active low
  */
 struct acpi_gpio_info {
 	bool gpioint;
+	bool active_low;
 };
 
 #ifdef CONFIG_GPIO_ACPI
-- 
cgit v1.2.3


From 88f074f4871a8c212b212b725e4dcdcdb09613c1 Mon Sep 17 00:00:00 2001
From: "Chen, Gong" <gong.chen@linux.intel.com>
Date: Fri, 18 Oct 2013 14:28:59 -0700
Subject: ACPI, CPER: Update cper info

We have a lot of confusing names of functions and data structures in
amongs the the error reporting code.  In particular the "apei" prefix
has been applied to many objects that are not part of APEI.  Since we
will be using these routines for extended error log reporting it will
be clearer if we fix up the names first.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Acked-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/linux/cper.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cper.h b/include/linux/cper.h
index c23049496531..09ebe2113641 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -389,6 +389,6 @@ struct cper_sec_pcie {
 
 u64 cper_next_record_id(void);
 void cper_print_bits(const char *prefix, unsigned int bits,
-		     const char *strs[], unsigned int strs_size);
+		     const char * const strs[], unsigned int strs_size);
 
 #endif
-- 
cgit v1.2.3


From 10ef6b0dffe404bcc54e94cb2ca1a5b18445a66b Mon Sep 17 00:00:00 2001
From: "Chen, Gong" <gong.chen@linux.intel.com>
Date: Fri, 18 Oct 2013 14:29:07 -0700
Subject: bitops: Introduce a more generic BITMASK macro

GENMASK is used to create a contiguous bitmask([hi:lo]). It is
implemented twice in current kernel. One is in EDAC driver, the other
is in SiS/XGI FB driver. Move it to a more generic place for other
usage.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Thomas Winischhofer <thomas@winischhofer.net>
Cc: Jean-Christophe Plagniol-Villard <plagnioj@jcrosoft.com>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Acked-by: Borislav Petkov <bp@suse.de>
Acked-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/linux/bitops.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index a3b6b82108b9..bd0c4598d03b 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -10,6 +10,14 @@
 #define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
 #endif
 
+/*
+ * Create a contiguous bitmask starting at bit position @l and ending at
+ * position @h. For example
+ * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000.
+ */
+#define GENMASK(h, l)		(((U32_C(1) << ((h) - (l) + 1)) - 1) << (l))
+#define GENMASK_ULL(h, l)	(((U64_C(1) << ((h) - (l) + 1)) - 1) << (l))
+
 extern unsigned int __sw_hweight8(unsigned int w);
 extern unsigned int __sw_hweight16(unsigned int w);
 extern unsigned int __sw_hweight32(unsigned int w);
-- 
cgit v1.2.3


From 4023fe6ff2192d6050647571ea54f5497b2ec8f6 Mon Sep 17 00:00:00 2001
From: Jyri Sarha <jsarha@ti.com>
Date: Fri, 18 Oct 2013 18:37:43 +0300
Subject: ASoC: davinci-mcasp: Extract DMA channels directly from DT

Extract DMA channels directly from DT as they can not be found from
platform resources anymore. This is a work-around until davinci audio
driver is updated to use dmaengine.

Signed-off-by: Jyri Sarha <jsarha@ti.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/platform_data/davinci_asp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/davinci_asp.h b/include/linux/platform_data/davinci_asp.h
index 8db5ae03b6e3..689a856b86f9 100644
--- a/include/linux/platform_data/davinci_asp.h
+++ b/include/linux/platform_data/davinci_asp.h
@@ -84,6 +84,8 @@ struct snd_platform_data {
 	u8 version;
 	u8 txnumevt;
 	u8 rxnumevt;
+	int tx_dma_channel;
+	int rx_dma_channel;
 };
 
 enum {
-- 
cgit v1.2.3


From f65f0a1a9836abbfbe5c9b8fa0452e4d8eb7bf00 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 15 Sep 2013 03:50:17 -0700
Subject: leds: lp55xx: enable setting default trigger

This enables setting a default trigger on an LP55xx channel,
either from platform data or device tree. This mechanism is
identical to the mechanism for GPIO LEDs and references the
common LEDs device tree bindings.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Milo Kim <milo.kim@ti.com>
Acked-by: Milo Kim <milo.kim@ti.com>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 include/linux/platform_data/leds-lp55xx.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/leds-lp55xx.h b/include/linux/platform_data/leds-lp55xx.h
index 51a2ff579d60..c32de4dcec54 100644
--- a/include/linux/platform_data/leds-lp55xx.h
+++ b/include/linux/platform_data/leds-lp55xx.h
@@ -22,6 +22,7 @@
 
 struct lp55xx_led_config {
 	const char *name;
+	const char *default_trigger;
 	u8 chan_nr;
 	u8 led_current; /* mA x10, 0 if led is not connected */
 	u8 max_current;
-- 
cgit v1.2.3


From bb6febdc90efe7f664328075c204eed8e9af7ec9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maximilian=20G=C3=BCntner?= <maximilian.guentner@gmail.com>
Date: Wed, 16 Oct 2013 18:09:17 -0700
Subject: leds: Added driver for the NXP PCA9685 I2C chip
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The NXP PCA9685 supports 16 channels/leds using a 12-bit PWM (4095
levels of brightness)
This driver supports configuration using platform_data.

Signed-off-by: Maximilian Güntner <maximilian.guentner@gmail.com>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 include/linux/platform_data/leds-pca9685.h | 35 ++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 include/linux/platform_data/leds-pca9685.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/leds-pca9685.h b/include/linux/platform_data/leds-pca9685.h
new file mode 100644
index 000000000000..778e9e4249cc
--- /dev/null
+++ b/include/linux/platform_data/leds-pca9685.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2013 Maximilian Güntner <maximilian.guentner@gmail.com>
+ *
+ * This file is subject to the terms and conditions of version 2 of
+ * the GNU General Public License.  See the file COPYING in the main
+ * directory of this archive for more details.
+ *
+ * Based on leds-pca963x.h by Peter Meerwald <p.meerwald@bct-electronic.com>
+ *
+ * LED driver for the NXP PCA9685 PWM chip
+ *
+ */
+
+#ifndef __LINUX_PCA9685_H
+#define __LINUX_PCA9685_H
+
+#include <linux/leds.h>
+
+enum pca9685_outdrv {
+	PCA9685_OPEN_DRAIN,
+	PCA9685_TOTEM_POLE,
+};
+
+enum pca9685_inverted {
+	PCA9685_NOT_INVERTED,
+	PCA9685_INVERTED,
+};
+
+struct pca9685_platform_data {
+	struct led_platform_data leds;
+	enum pca9685_outdrv outdrv;
+	enum pca9685_inverted inverted;
+};
+
+#endif /* __LINUX_PCA9685_H */
-- 
cgit v1.2.3


From 7fcd427465e710d0c4e2737d2f02b2ffa14b9bb3 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Tue, 15 Oct 2013 20:14:21 +0100
Subject: mfd: Allow mapping regulator supplies to MFD device from children

Occasionally, it is useful to map supplies from a child device onto the
MFD device. A typical usecase for this would be if the MFD device is
represented as a single node in device tree. All supplies will be
defined in device tree as existing on the MFD device. When a child
depends on frameworks which might have no knowledge of MFD to lookup
supplies on its behalf the supply will not be found.

This patch adds a list of supplies that should be looked up on the
parent rather than the child as part of the mfd_cell structure.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/mfd/core.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index cebe97ee98b8..7314fc4e6d25 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h
@@ -59,6 +59,12 @@ struct mfd_cell {
 	 * pm_runtime_no_callbacks().
 	 */
 	bool			pm_runtime_no_callbacks;
+
+	/* A list of regulator supplies that should be mapped to the MFD
+	 * device rather than the child device when requested
+	 */
+	const char		**parent_supplies;
+	int			num_parent_supplies;
 };
 
 /*
-- 
cgit v1.2.3


From 92ec11809565cf6429c75204e99e0f583b5c9d7c Mon Sep 17 00:00:00 2001
From: Thierry Reding <thierry.reding@gmail.com>
Date: Wed, 23 Oct 2013 13:40:55 +0200
Subject: sched/wait: Fix build breakage

The wait_event_interruptible_lock_irq() macro is missing a
semi-colon which causes a build failure in the i915 DRM driver.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1382528455-29911-1-git-send-email-treding@nvidia.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index ec099b03e11b..3b23afa04d6b 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -732,7 +732,7 @@ do {									\
 	int __ret = 0;							\
 	if (!(condition))						\
 		__ret = __wait_event_interruptible_lock_irq(wq,		\
-						condition, lock,)	\
+						condition, lock,);	\
 	__ret;								\
 })
 
-- 
cgit v1.2.3


From 4b3db708b114fc35ff1e0cd28a2bfb1490dbb5d3 Mon Sep 17 00:00:00 2001
From: "Chen, Gong" <gong.chen@linux.intel.com>
Date: Mon, 21 Oct 2013 14:29:25 -0700
Subject: ACPI, x86: Extended error log driver for x86 platform

This H/W error log driver (a.k.a eMCA driver) is implemented based on
http://www.intel.com/content/www/us/en/architecture-and-technology/enhanced-mca-logging-xeon-paper.html

After errors are captured, more detailed platform specific information
can be got via this new enhanced H/W error log driver. Most notably we
can track memory errors back to the DIMM slot silk screen label.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/linux/acpi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index a5db4aeefa36..c30bac8503bc 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -311,6 +311,7 @@ struct acpi_osc_context {
 #define OSC_INVALID_REVISION_ERROR	8
 #define OSC_CAPABILITIES_MASK_ERROR	16
 
+acpi_status acpi_str_to_uuid(char *str, u8 *uuid);
 acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
 
 /* platform-wide _OSC bits */
-- 
cgit v1.2.3


From dd6dad4288cb93e79bd7abfa6c6a338c47454d1a Mon Sep 17 00:00:00 2001
From: "Chen, Gong" <gong.chen@linux.intel.com>
Date: Fri, 18 Oct 2013 14:29:25 -0700
Subject: DMI: Parse memory device (type 17) in SMBIOS

This patch adds a new interface to decode memory device (type 17)
to help error reporting on DIMMs.

Original-author: Tony Luck <tony.luck@intel.com>
Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Acked-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Acked-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/linux/dmi.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dmi.h b/include/linux/dmi.h
index b6eb7a05d58e..f820f0a336c9 100644
--- a/include/linux/dmi.h
+++ b/include/linux/dmi.h
@@ -99,6 +99,7 @@ extern const char * dmi_get_system_info(int field);
 extern const struct dmi_device * dmi_find_device(int type, const char *name,
 	const struct dmi_device *from);
 extern void dmi_scan_machine(void);
+extern void dmi_memdev_walk(void);
 extern void dmi_set_dump_stack_arch_desc(void);
 extern bool dmi_get_date(int field, int *yearp, int *monthp, int *dayp);
 extern int dmi_name_in_vendors(const char *str);
@@ -107,6 +108,7 @@ extern int dmi_available;
 extern int dmi_walk(void (*decode)(const struct dmi_header *, void *),
 	void *private_data);
 extern bool dmi_match(enum dmi_field f, const char *str);
+extern void dmi_memdev_name(u16 handle, const char **bank, const char **device);
 
 #else
 
@@ -115,6 +117,7 @@ static inline const char * dmi_get_system_info(int field) { return NULL; }
 static inline const struct dmi_device * dmi_find_device(int type, const char *name,
 	const struct dmi_device *from) { return NULL; }
 static inline void dmi_scan_machine(void) { return; }
+static inline void dmi_memdev_walk(void) { }
 static inline void dmi_set_dump_stack_arch_desc(void) { }
 static inline bool dmi_get_date(int field, int *yearp, int *monthp, int *dayp)
 {
@@ -133,6 +136,8 @@ static inline int dmi_walk(void (*decode)(const struct dmi_header *, void *),
 	void *private_data) { return -1; }
 static inline bool dmi_match(enum dmi_field f, const char *str)
 	{ return false; }
+static inline void dmi_memdev_name(u16 handle, const char **bank,
+		const char **device) { }
 static inline const struct dmi_system_id *
 	dmi_first_match(const struct dmi_system_id *list) { return NULL; }
 
-- 
cgit v1.2.3


From 147de14772ed897727dba7353916b02d1e0f17f4 Mon Sep 17 00:00:00 2001
From: "Chen, Gong" <gong.chen@linux.intel.com>
Date: Fri, 18 Oct 2013 14:30:13 -0700
Subject: ACPI, APEI, CPER: Add UEFI 2.4 support for memory error

In latest UEFI spec(by now it is 2.4) memory error definition
for CPER (UEFI 2.4 Appendix N Common Platform Error Record)
adds some new fields. These fields help people to locate
memory error to an actual DIMM location.

Original-author: Tony Luck <tony.luck@intel.com>
Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Reviewed-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
Acked-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/linux/cper.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cper.h b/include/linux/cper.h
index 09ebe2113641..2fc0ec3d89cc 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -218,8 +218,8 @@ enum {
 #define CPER_PROC_VALID_IP			0x1000
 
 #define CPER_MEM_VALID_ERROR_STATUS		0x0001
-#define CPER_MEM_VALID_PHYSICAL_ADDRESS		0x0002
-#define CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK	0x0004
+#define CPER_MEM_VALID_PA			0x0002
+#define CPER_MEM_VALID_PA_MASK			0x0004
 #define CPER_MEM_VALID_NODE			0x0008
 #define CPER_MEM_VALID_CARD			0x0010
 #define CPER_MEM_VALID_MODULE			0x0020
@@ -232,6 +232,9 @@ enum {
 #define CPER_MEM_VALID_RESPONDER_ID		0x1000
 #define CPER_MEM_VALID_TARGET_ID		0x2000
 #define CPER_MEM_VALID_ERROR_TYPE		0x4000
+#define CPER_MEM_VALID_RANK_NUMBER		0x8000
+#define CPER_MEM_VALID_CARD_HANDLE		0x10000
+#define CPER_MEM_VALID_MODULE_HANDLE		0x20000
 
 #define CPER_PCIE_VALID_PORT_TYPE		0x0001
 #define CPER_PCIE_VALID_VERSION			0x0002
@@ -347,6 +350,10 @@ struct cper_sec_mem_err {
 	__u64	responder_id;
 	__u64	target_id;
 	__u8	error_type;
+	__u8	reserved;
+	__u16	rank;
+	__u16	mem_array_handle;	/* card handle in UEFI 2.4 */
+	__u16	mem_dev_handle;		/* module handle in UEFI 2.4 */
 };
 
 struct cper_sec_pcie {
-- 
cgit v1.2.3


From 56507694de3453076d73e0e9813349586ee67e59 Mon Sep 17 00:00:00 2001
From: "Chen, Gong" <gong.chen@linux.intel.com>
Date: Fri, 18 Oct 2013 14:30:38 -0700
Subject: EDAC, GHES: Update ghes error record info

In latest UEFI spec(by now it's 2.4) there are some new
fields for memory error reporting. Add these new fields for
ghes_edac interface.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Cc: Mauro Carvalho Chehab <m.chehab@samsung.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/linux/edac.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/edac.h b/include/linux/edac.h
index 5c6d7fbaf89e..dbdffe8d4469 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -51,7 +51,7 @@ static inline void opstate_init(void)
 #define EDAC_MC_LABEL_LEN	31
 
 /* Maximum size of the location string */
-#define LOCATION_SIZE 80
+#define LOCATION_SIZE 256
 
 /* Defines the maximum number of labels that can be reported */
 #define EDAC_MAX_LABELS		8
-- 
cgit v1.2.3


From 0c02c8007ea5554d028f99fd3e29fc201fdeeab3 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Thu, 19 Sep 2013 11:22:36 -0500
Subject: of/irq: Rename of_irq_map_* functions to of_irq_parse_*

The OF irq handling code has been overloading the term 'map' to refer to
both parsing the data in the device tree and mapping it to the internal
linux irq system. This is probably because the device tree does have the
concept of an 'interrupt-map' function for translating interrupt
references from one node to another, but 'map' is still confusing when
the primary purpose of some of the functions are to parse the DT data.

This patch renames all the of_irq_map_* functions to of_irq_parse_*
which makes it clear that there is a difference between the parsing
phase and the mapping phase. Kernel code can make use of just the
parsing or just the mapping support as needed by the subsystem.

The patch was generated mechanically with a handful of sed commands.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
Acked-by: Michal Simek <monstr@monstr.eu>
Acked-by: Tony Lindgren <tony@atomide.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/of_irq.h | 8 ++++----
 include/linux/of_pci.h | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index fcd63baee5f2..a00bc71e62a3 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -35,12 +35,12 @@ typedef int (*of_irq_init_cb_t)(struct device_node *, struct device_node *);
 #if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
 extern unsigned int of_irq_workarounds;
 extern struct device_node *of_irq_dflt_pic;
-extern int of_irq_map_oldworld(struct device_node *device, int index,
+extern int of_irq_parse_oldworld(struct device_node *device, int index,
 			       struct of_irq *out_irq);
 #else /* CONFIG_PPC32 && CONFIG_PPC_PMAC */
 #define of_irq_workarounds (0)
 #define of_irq_dflt_pic (NULL)
-static inline int of_irq_map_oldworld(struct device_node *device, int index,
+static inline int of_irq_parse_oldworld(struct device_node *device, int index,
 				      struct of_irq *out_irq)
 {
 	return -EINVAL;
@@ -48,10 +48,10 @@ static inline int of_irq_map_oldworld(struct device_node *device, int index,
 #endif /* CONFIG_PPC32 && CONFIG_PPC_PMAC */
 
 
-extern int of_irq_map_raw(struct device_node *parent, const __be32 *intspec,
+extern int of_irq_parse_raw(struct device_node *parent, const __be32 *intspec,
 			  u32 ointsize, const __be32 *addr,
 			  struct of_irq *out_irq);
-extern int of_irq_map_one(struct device_node *device, int index,
+extern int of_irq_parse_one(struct device_node *device, int index,
 			  struct of_irq *out_irq);
 extern unsigned int irq_create_of_mapping(struct device_node *controller,
 					  const u32 *intspec,
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index fd9c408631a0..839ba20808fe 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -6,7 +6,7 @@
 
 struct pci_dev;
 struct of_irq;
-int of_irq_map_pci(const struct pci_dev *pdev, struct of_irq *out_irq);
+int of_irq_parse_pci(const struct pci_dev *pdev, struct of_irq *out_irq);
 
 struct device_node;
 struct device_node *of_pci_find_child_device(struct device_node *parent,
-- 
cgit v1.2.3


From 530210c7814e83564c7ca7bca8192515042c0b63 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Sun, 15 Sep 2013 16:39:11 +0100
Subject: of/irq: Replace of_irq with of_phandle_args

struct of_irq and struct of_phandle_args are exactly the same structure.
This patch makes the kernel use of_phandle_args everywhere. This in
itself isn't a big deal, but it makes some follow-on patches simpler.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
Acked-by: Michal Simek <monstr@monstr.eu>
Acked-by: Tony Lindgren <tony@atomide.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/of_irq.h | 24 ++++--------------------
 include/linux/of_pci.h |  4 ++--
 2 files changed, 6 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index a00bc71e62a3..8d9f85560d48 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -8,22 +8,6 @@
 #include <linux/ioport.h>
 #include <linux/of.h>
 
-/**
- * of_irq - container for device_node/irq_specifier pair for an irq controller
- * @controller: pointer to interrupt controller device tree node
- * @size: size of interrupt specifier
- * @specifier: array of cells @size long specifing the specific interrupt
- *
- * This structure is returned when an interrupt is mapped. The controller
- * field needs to be put() after use
- */
-#define OF_MAX_IRQ_SPEC		4 /* We handle specifiers of at most 4 cells */
-struct of_irq {
-	struct device_node *controller; /* Interrupt controller node */
-	u32 size; /* Specifier size */
-	u32 specifier[OF_MAX_IRQ_SPEC]; /* Specifier copy */
-};
-
 typedef int (*of_irq_init_cb_t)(struct device_node *, struct device_node *);
 
 /*
@@ -36,12 +20,12 @@ typedef int (*of_irq_init_cb_t)(struct device_node *, struct device_node *);
 extern unsigned int of_irq_workarounds;
 extern struct device_node *of_irq_dflt_pic;
 extern int of_irq_parse_oldworld(struct device_node *device, int index,
-			       struct of_irq *out_irq);
+			       struct of_phandle_args *out_irq);
 #else /* CONFIG_PPC32 && CONFIG_PPC_PMAC */
 #define of_irq_workarounds (0)
 #define of_irq_dflt_pic (NULL)
 static inline int of_irq_parse_oldworld(struct device_node *device, int index,
-				      struct of_irq *out_irq)
+				      struct of_phandle_args *out_irq)
 {
 	return -EINVAL;
 }
@@ -50,9 +34,9 @@ static inline int of_irq_parse_oldworld(struct device_node *device, int index,
 
 extern int of_irq_parse_raw(struct device_node *parent, const __be32 *intspec,
 			  u32 ointsize, const __be32 *addr,
-			  struct of_irq *out_irq);
+			  struct of_phandle_args *out_irq);
 extern int of_irq_parse_one(struct device_node *device, int index,
-			  struct of_irq *out_irq);
+			  struct of_phandle_args *out_irq);
 extern unsigned int irq_create_of_mapping(struct device_node *controller,
 					  const u32 *intspec,
 					  unsigned int intsize);
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index 839ba20808fe..f297237349e8 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -5,8 +5,8 @@
 #include <linux/msi.h>
 
 struct pci_dev;
-struct of_irq;
-int of_irq_parse_pci(const struct pci_dev *pdev, struct of_irq *out_irq);
+struct of_phandle_args;
+int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq);
 
 struct device_node;
 struct device_node *of_pci_find_child_device(struct device_node *parent,
-- 
cgit v1.2.3


From e6d30ab1e7d1281784672c0fc2ffa385cfb7279e Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Sun, 15 Sep 2013 16:55:53 +0100
Subject: of/irq: simplify args to irq_create_of_mapping

All the callers of irq_create_of_mapping() pass the contents of a struct
of_phandle_args structure to the function. Since all the callers already
have an of_phandle_args pointer, why not pass it directly to
irq_create_of_mapping()?

Signed-off-by: Grant Likely <grant.likely@linaro.org>
Acked-by: Michal Simek <monstr@monstr.eu>
Acked-by: Tony Lindgren <tony@atomide.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/of_irq.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index 8d9f85560d48..3bbba8d6adc8 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -37,9 +37,7 @@ extern int of_irq_parse_raw(struct device_node *parent, const __be32 *intspec,
 			  struct of_phandle_args *out_irq);
 extern int of_irq_parse_one(struct device_node *device, int index,
 			  struct of_phandle_args *out_irq);
-extern unsigned int irq_create_of_mapping(struct device_node *controller,
-					  const u32 *intspec,
-					  unsigned int intsize);
+extern unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data);
 extern int of_irq_to_resource(struct device_node *dev, int index,
 			      struct resource *r);
 extern int of_irq_count(struct device_node *dev);
-- 
cgit v1.2.3


From 2361613206e66ce59cc0e08efa8d98ec15b84ed1 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Sun, 15 Sep 2013 22:32:39 +0100
Subject: of/irq: Refactor interrupt-map parsing

All the users of of_irq_parse_raw pass in a raw interrupt specifier from
the device tree and expect it to be returned (possibly modified) in an
of_phandle_args structure. However, the primary function of
of_irq_parse_raw() is to check for translations due to the presence of
one or more interrupt-map properties. The actual placing of the data
into an of_phandle_args structure is trivial. If it is refactored to
accept an of_phandle_args structure directly, then it becomes possible
to consume of_phandle_args from other sources. This is important for an
upcoming patch that allows a device to be connected to more than one
interrupt parent. It also simplifies the code a bit.

The biggest complication with this patch is that the old version works
on the interrupt specifiers in __be32 form, but the of_phandle_args
structure is intended to carry it in the cpu-native version. A bit of
churn was required to make this work. In the end it results in tighter
code, so the churn is worth it.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
Acked-by: Tony Lindgren <tony@atomide.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/of_irq.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index 3bbba8d6adc8..c0d6dfe80895 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -31,10 +31,7 @@ static inline int of_irq_parse_oldworld(struct device_node *device, int index,
 }
 #endif /* CONFIG_PPC32 && CONFIG_PPC_PMAC */
 
-
-extern int of_irq_parse_raw(struct device_node *parent, const __be32 *intspec,
-			  u32 ointsize, const __be32 *addr,
-			  struct of_phandle_args *out_irq);
+extern int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq);
 extern int of_irq_parse_one(struct device_node *device, int index,
 			  struct of_phandle_args *out_irq);
 extern unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data);
-- 
cgit v1.2.3


From 624cfca534f9b1ffb1326617b4e973a3d5ecff4a Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Fri, 11 Oct 2013 22:05:10 +0100
Subject: of: Add helper for printing an of_phandle_args structure

It is sometimes useful for debug to get the contents of an
of_phandle_args structure out into the kernel log.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 include/linux/of.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index f95aee391e30..374e03536135 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -275,6 +275,7 @@ extern int of_n_size_cells(struct device_node *np);
 extern const struct of_device_id *of_match_node(
 	const struct of_device_id *matches, const struct device_node *node);
 extern int of_modalias_node(struct device_node *node, char *modalias, int len);
+extern void of_print_phandle_args(const char *msg, const struct of_phandle_args *args);
 extern struct device_node *of_parse_phandle(const struct device_node *np,
 					    const char *phandle_name,
 					    int index);
-- 
cgit v1.2.3


From 16b84e5a505c790538e534ad8dfda9c288691e40 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Thu, 19 Sep 2013 16:44:55 -0500
Subject: of/irq: Create of_irq_parse_and_map_pci() to consolidate arch code.

Several architectures open code effectively the same code block for
finding and mapping PCI irqs. This patch consolidates it down to a
single function.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
Acked-by: Michal Simek <monstr@monstr.eu>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/of_pci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index f297237349e8..1a1f5ffd5288 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -7,6 +7,7 @@
 struct pci_dev;
 struct of_phandle_args;
 int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq);
+int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin);
 
 struct device_node;
 struct device_node *of_pci_find_child_device(struct device_node *parent,
-- 
cgit v1.2.3


From 954e04b9491adea99e4590bc73937fdd8774ab3c Mon Sep 17 00:00:00 2001
From: Bryan Wu <cooloney@gmail.com>
Date: Tue, 24 Sep 2013 10:38:26 -0700
Subject: of: introduce of_get_available_child_count

Some drivers keep counting available child by themselves. So
introduce a new simple API like of_get_child_count() but for
available childs.

Cc: Josh Wu <josh.wu@atmel.com>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
Acked-by: Rob Herring <rob.herring@calxeda.com>
---
 include/linux/of.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index f95aee391e30..54c25606a997 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -226,6 +226,17 @@ static inline int of_get_child_count(const struct device_node *np)
 	return num;
 }
 
+static inline int of_get_available_child_count(const struct device_node *np)
+{
+	struct device_node *child;
+	int num = 0;
+
+	for_each_available_child_of_node(np, child)
+		num++;
+
+	return num;
+}
+
 extern struct device_node *of_find_node_with_property(
 	struct device_node *from, const char *prop_name);
 #define for_each_node_with_property(dn, prop_name) \
@@ -376,6 +387,11 @@ static inline int of_get_child_count(const struct device_node *np)
 	return 0;
 }
 
+static inline int of_get_available_child_count(const struct device_node *np)
+{
+	return 0;
+}
+
 static inline int of_device_is_compatible(const struct device_node *device,
 					  const char *name)
 {
-- 
cgit v1.2.3


From 30dae2f98612d7c8cd855861b9de205ebd9ef4fa Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sre@debian.org>
Date: Tue, 22 Oct 2013 11:02:56 -0700
Subject: leds: lp55xx: handle enable pin in driver

This patch moves the handling of the chip's enable pin from the board
code into the driver. It also updates all board-code files using the
driver to incorporate this change.

This is needed for device tree support of the enable pin.

Signed-off-by: Sebastian Reichel <sre@debian.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 include/linux/platform_data/leds-lp55xx.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/leds-lp55xx.h b/include/linux/platform_data/leds-lp55xx.h
index c32de4dcec54..624ff9edad6f 100644
--- a/include/linux/platform_data/leds-lp55xx.h
+++ b/include/linux/platform_data/leds-lp55xx.h
@@ -67,10 +67,8 @@ struct lp55xx_platform_data {
 	/* Clock configuration */
 	u8 clock_mode;
 
-	/* Platform specific functions */
-	int (*setup_resources)(void);
-	void (*release_resources)(void);
-	void (*enable)(bool state);
+	/* optional enable GPIO */
+	int enable_gpio;
 
 	/* Predefined pattern data */
 	struct lp55xx_predef_pattern *patterns;
-- 
cgit v1.2.3


From a01779f89fc8a2225cb82dca0fc7b8451851cb7b Mon Sep 17 00:00:00 2001
From: Josh Cartwright <joshc@codeaurora.org>
Date: Mon, 28 Oct 2013 13:12:35 -0500
Subject: regmap: add SPMI support

Add basic support for the System Power Management Interface (SPMI) bus.
This is a simple implementation which only implements register accesses
via the Extended Register Read/Write Long commands.

Signed-off-by: Josh Cartwright <joshc@codeaurora.org>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/regmap.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index a10380bfbeac..3f5abc86b6b5 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -23,6 +23,7 @@ struct device;
 struct i2c_client;
 struct irq_domain;
 struct spi_device;
+struct spmi_device;
 struct regmap;
 struct regmap_range_cfg;
 struct regmap_field;
@@ -318,6 +319,8 @@ struct regmap *regmap_init_i2c(struct i2c_client *i2c,
 			       const struct regmap_config *config);
 struct regmap *regmap_init_spi(struct spi_device *dev,
 			       const struct regmap_config *config);
+struct regmap *regmap_init_spmi(struct spmi_device *dev,
+			       const struct regmap_config *config);
 struct regmap *regmap_init_mmio_clk(struct device *dev, const char *clk_id,
 				    void __iomem *regs,
 				    const struct regmap_config *config);
@@ -330,6 +333,8 @@ struct regmap *devm_regmap_init_i2c(struct i2c_client *i2c,
 				    const struct regmap_config *config);
 struct regmap *devm_regmap_init_spi(struct spi_device *dev,
 				    const struct regmap_config *config);
+struct regmap *devm_regmap_init_spmi(struct spmi_device *dev,
+				     const struct regmap_config *config);
 struct regmap *devm_regmap_init_mmio_clk(struct device *dev, const char *clk_id,
 					 void __iomem *regs,
 					 const struct regmap_config *config);
-- 
cgit v1.2.3


From c2d3f25dda016d9697c5416810d4528770f0a281 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 9 Oct 2013 14:08:09 +0200
Subject: uprobes: Remove the wrong __weak attribute

linux/uprobes.h declares arch_uprobe_skip_sstep() as a weak function.
But as there is no definition of generic version so when trying to build
uprobes for an architecture that doesn't yet have a arch_uprobe_skip_sstep()
implementation, the vmlinux will try to call arch_uprobe_skip_sstep()
somehwere in Stupidhistan leading to a system crash.  We rather want a
proper link error so remove arch_uprobe_skip_sstep().

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/uprobes.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 06f28beed7c2..e6fba627ea45 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -123,7 +123,7 @@ extern int uprobe_post_sstep_notifier(struct pt_regs *regs);
 extern int uprobe_pre_sstep_notifier(struct pt_regs *regs);
 extern void uprobe_notify_resume(struct pt_regs *regs);
 extern bool uprobe_deny_signal(void);
-extern bool __weak arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs);
+extern bool arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs);
 extern void uprobe_clear_state(struct mm_struct *mm);
 #else /* !CONFIG_UPROBES */
 struct uprobes_state {
-- 
cgit v1.2.3


From 3ab679661721b1ec2aaad99a801870ed59ab1110 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 16 Oct 2013 19:39:37 +0200
Subject: uprobes: Teach uprobe_copy_process() to handle CLONE_VFORK

uprobe_copy_process() does nothing if the child shares ->mm with
the forking process, but there is a special case: CLONE_VFORK.
In this case it would be more correct to do dup_utask() but avoid
dup_xol(). This is not that important, the child should not unwind
its stack too much, this can corrupt the parent's stack, but at
least we need this to allow to ret-probe __vfork() itself.

Note: in theory, it would be better to check task_pt_regs(p)->sp
instead of CLONE_VFORK, we need to dup_utask() if and only if the
child can return from the function called by the parent. But this
needs the arch-dependant helper, and I think that nobody actually
does clone(same_stack, CLONE_VM).

Reported-by: Martin Cermak <mcermak@redhat.com>
Reported-by: David Smith <dsmith@redhat.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/uprobes.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index e6fba627ea45..9e0d5a6fe7a8 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -117,7 +117,7 @@ extern void uprobe_start_dup_mmap(void);
 extern void uprobe_end_dup_mmap(void);
 extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm);
 extern void uprobe_free_utask(struct task_struct *t);
-extern void uprobe_copy_process(struct task_struct *t);
+extern void uprobe_copy_process(struct task_struct *t, unsigned long flags);
 extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs);
 extern int uprobe_post_sstep_notifier(struct pt_regs *regs);
 extern int uprobe_pre_sstep_notifier(struct pt_regs *regs);
@@ -174,7 +174,7 @@ static inline unsigned long uprobe_get_swbp_addr(struct pt_regs *regs)
 static inline void uprobe_free_utask(struct task_struct *t)
 {
 }
-static inline void uprobe_copy_process(struct task_struct *t)
+static inline void uprobe_copy_process(struct task_struct *t, unsigned long flags)
 {
 }
 static inline void uprobe_clear_state(struct mm_struct *mm)
-- 
cgit v1.2.3


From 403c1d0be5ccbd750d25c59d8358843a81e52e3b Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 25 Oct 2013 12:59:05 +0200
Subject: gpio: provide stubs for devres gpio functions

commit 6b3d8145dcfdbbb43f13544e16f44f4574f941dd
"gpiolib: make GPIO_DEVRES depend on GPIOLIB"
breaks builds when device drivers are using devm_gpio*
devres functions without enabling GPIOLIB, relying on
the devres code to be compiled anyway.

Provide stubs so that we get these if we're using the
devres functions without GPIOLIB.

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Cc: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio.h | 34 +++++++++++++++++++++++++++-------
 1 file changed, 27 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index c691df044458..0c56b9e9c209 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -77,6 +77,15 @@ static inline int irq_to_gpio(unsigned int irq)
 
 #endif /* ! CONFIG_ARCH_HAVE_CUSTOM_GPIO_H */
 
+/* CONFIG_GPIOLIB: bindings for managed devices that want to request gpios */
+
+struct device;
+
+int devm_gpio_request(struct device *dev, unsigned gpio, const char *label);
+int devm_gpio_request_one(struct device *dev, unsigned gpio,
+			  unsigned long flags, const char *label);
+void devm_gpio_free(struct device *dev, unsigned int gpio);
+
 #else /* ! CONFIG_GPIOLIB */
 
 #include <linux/kernel.h>
@@ -241,14 +250,25 @@ gpiochip_remove_pin_ranges(struct gpio_chip *chip)
 	WARN_ON(1);
 }
 
-#endif /* ! CONFIG_GPIOLIB */
+static inline int devm_gpio_request(struct device *dev, unsigned gpio,
+				    const char *label)
+{
+	WARN_ON(1);
+	return -EINVAL;
+}
 
-struct device;
+static inline int devm_gpio_request_one(struct device *dev, unsigned gpio,
+					unsigned long flags, const char *label)
+{
+	WARN_ON(1);
+	return -EINVAL;
+}
 
-/* bindings for managed devices that want to request gpios */
-int devm_gpio_request(struct device *dev, unsigned gpio, const char *label);
-int devm_gpio_request_one(struct device *dev, unsigned gpio,
-			  unsigned long flags, const char *label);
-void devm_gpio_free(struct device *dev, unsigned int gpio);
+static inline void devm_gpio_free(struct device *dev, unsigned int gpio)
+{
+	WARN_ON(1);
+}
+
+#endif /* ! CONFIG_GPIOLIB */
 
 #endif /* __LINUX_GPIO_H */
-- 
cgit v1.2.3


From 335d7a7d63aa3a6da4d4903ef6e64de4a88f27da Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 29 Oct 2013 20:10:13 +1100
Subject: gpiolib: include gpio/consumer.h in of_gpio.h for desc_to_gpio()

Fixes this build error on sparc:

In file included from drivers/spi/spi.c:33:0:
include/linux/of_gpio.h: In function 'of_get_named_gpio_flags':
include/linux/of_gpio.h:93:3: error: implicit declaration of function 'desc_to_gpio' [-Werror=implicit-function-declaration]

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/of_gpio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h
index d71f2cc141ae..f14123a5a9df 100644
--- a/include/linux/of_gpio.h
+++ b/include/linux/of_gpio.h
@@ -19,9 +19,9 @@
 #include <linux/errno.h>
 #include <linux/gpio.h>
 #include <linux/of.h>
+#include <linux/gpio/consumer.h>
 
 struct device_node;
-struct gpio_desc;
 
 /*
  * This is Linux-specific flags. By default controllers' and Linux' mapping
-- 
cgit v1.2.3


From f3ed0b66482fa2a0403280174a998487e9054867 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 29 Oct 2013 01:06:23 +1100
Subject: gpiolib: provide a declaration of seq_file in gpio/driver.h

Fixes this build error:

In file included from include/asm-generic/gpio.h:13:0,
                 from include/linux/gpio.h:51,
                 from include/linux/of_gpio.h:20,
                 from arch/powerpc/sysdev/ppc4xx_gpio.c:29:
include/linux/gpio/driver.h:85:14: error: 'struct seq_file' declared inside=
 parameter list [-Werror]
include/linux/gpio/driver.h:85:14: error: its scope is only this definition=
 or declaration, which is probably not what you want [-Werror]

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/driver.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index cd9da3885d79..656a27efb2c8 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -5,6 +5,7 @@
 
 struct device;
 struct gpio_desc;
+struct seq_file;
 
 /**
  * struct gpio_chip - abstract a GPIO controller
-- 
cgit v1.2.3


From a3e31b4588443f37d82195096c6b30dff1c152c2 Mon Sep 17 00:00:00 2001
From: Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
Date: Wed, 18 Sep 2013 11:53:05 +0100
Subject: of: Move definition of of_find_next_cache_node into common code.

Since the definition of_find_next_cache_node is architecture independent,
the existing definition in powerpc can be moved to driver/of/base.c

Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Rob Herring <rob.herring@calxeda.com>
Signed-off-by: Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/of.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index f95aee391e30..c08c07e249b3 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -226,6 +226,8 @@ static inline int of_get_child_count(const struct device_node *np)
 	return num;
 }
 
+/* cache lookup */
+extern struct device_node *of_find_next_cache_node(const struct device_node *);
 extern struct device_node *of_find_node_with_property(
 	struct device_node *from, const char *prop_name);
 #define for_each_node_with_property(dn, prop_name) \
-- 
cgit v1.2.3


From 7d716456a0ee4e9bd63be9234f886d20382ac950 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 31 Oct 2013 12:48:14 +0100
Subject: sched/wait: Fix __wait_event_interruptible_lock_irq_timeout()

__wait_event_interruptible_lock_irq_timeout() needs the timeout
parameter passed instead of "ret".

This magically compiled since the only user has a local ret
variable. Luckily we got a build warning:

  CC      drivers/s390/scsi/zfcp_qdio.o
  drivers/s390/scsi/zfcp_qdio.c: In function 'zfcp_qdio_sbal_get':
  include/linux/wait.h:780:15: warning: 'ret' may be used uninitialized

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20131031114814.GB5551@osiris
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 3b23afa04d6b..61939ba30aa0 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -739,7 +739,7 @@ do {									\
 #define __wait_event_interruptible_lock_irq_timeout(wq, condition,	\
 						    lock, timeout)	\
 	___wait_event(wq, ___wait_cond_timeout(condition),		\
-		      TASK_INTERRUPTIBLE, 0, ret,			\
+		      TASK_INTERRUPTIBLE, 0, timeout,			\
 		      spin_unlock_irq(&lock);				\
 		      __ret = schedule_timeout(__ret);			\
 		      spin_lock_irq(&lock));
-- 
cgit v1.2.3


From f6f0747e5bc69401d7f90313aa1b46709d27840a Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <s.nawrocki@samsung.com>
Date: Mon, 28 Oct 2013 13:12:35 -0500
Subject: of: Add empty for_each_available_child_of_node() macro definition

Add this empty macro definition so users can be compiled without
excluding this macro call with preprocessor directives when CONFIG_OF
is disabled.

Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Rob Herring <rob.herring@calxeda.com>
---
 include/linux/of.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index 54017b83650b..b97f685c941f 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -366,6 +366,9 @@ static inline bool of_have_populated_dt(void)
 #define for_each_child_of_node(parent, child) \
 	while (0)
 
+#define for_each_available_child_of_node(parent, child) \
+	while (0)
+
 static inline struct device_node *of_get_child_by_name(
 					const struct device_node *node,
 					const char *name)
-- 
cgit v1.2.3


From b8a216269ec0ce2e961d32e6d640d7010b8a818e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 4 Oct 2013 22:06:53 +0200
Subject: sched: Move completion code from core.c to completion.c

Completions already have their own header file: linux/completion.h
Move the implementation out of kernel/sched/core.c and into its own
file: kernel/sched/completion.c.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/n/tip-x2y49rmxu5dljt66ai2lcfuw@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/completion.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/completion.h b/include/linux/completion.h
index 3cd574d5b19e..22c33e35bcb2 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -5,7 +5,7 @@
  * (C) Copyright 2001 Linus Torvalds
  *
  * Atomic wait-for-completion handler data structures.
- * See kernel/sched/core.c for details.
+ * See kernel/sched/completion.c for details.
  */
 
 #include <linux/wait.h>
-- 
cgit v1.2.3


From 3820b4d2789f5166afdb136bb14f93166e6cfbc2 Mon Sep 17 00:00:00 2001
From: "David A. Long" <dave.long@linaro.org>
Date: Tue, 15 Oct 2013 17:04:16 -0400
Subject: uprobes: Move function declarations out of arch

Move the function declarations from the arch headers to the common
header, since only the function bodies are architecture-specific.
These changes are from Vincent Rabin's uprobes patch.

[ oleg: update arch/powerpc/include/asm/uprobes.h ]

Signed-off-by: Rabin Vincent <rabin@rab.in>
Signed-off-by: David A. Long <dave.long@linaro.org>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/uprobes.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 9e0d5a6fe7a8..28473e3f6068 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -30,6 +30,7 @@
 struct vm_area_struct;
 struct mm_struct;
 struct inode;
+struct notifier_block;
 
 #ifdef CONFIG_ARCH_SUPPORTS_UPROBES
 # include <asm/uprobes.h>
@@ -125,6 +126,13 @@ extern void uprobe_notify_resume(struct pt_regs *regs);
 extern bool uprobe_deny_signal(void);
 extern bool arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs);
 extern void uprobe_clear_state(struct mm_struct *mm);
+extern int  arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
+extern int  arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+extern int  arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
+extern int  arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
+extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
 #else /* !CONFIG_UPROBES */
 struct uprobes_state {
 };
-- 
cgit v1.2.3


From f72d41fa902fb19a9b63028202a400b0ce497491 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 5 Nov 2013 19:50:39 +0100
Subject: uprobes: Export write_opcode() as uprobe_write_opcode()

set_swbp() and set_orig_insn() are __weak, but this is pointless
because write_opcode() is static.

Export write_opcode() as uprobe_write_opcode() for the upcoming
arm port, this way it can actually override set_swbp() and use
__opcode_to_mem_arm(bpinsn) instead if UPROBE_SWBP_INSN.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/uprobes.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 28473e3f6068..319eae70fe84 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -109,6 +109,7 @@ extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsign
 extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
 extern bool __weak is_swbp_insn(uprobe_opcode_t *insn);
 extern bool __weak is_trap_insn(uprobe_opcode_t *insn);
+extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
 extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
 extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool);
 extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
-- 
cgit v1.2.3


From 5702941eec32cfd7b8cf9e36a0936e48170011a4 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Fri, 5 Jul 2013 00:31:36 +0800
Subject: irqchip: bcm2835: Convert to use IRQCHIP_DECLARE macro

This patch converts irq-bcm2835 driver to use the new IRQCHIP_DECLARE and
irqchip_init.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Tested-by: Stephen Warren <swarren@wwwdotorg.org>
Cc: Simon Arlott <simon@fire.lp0.eu>
Cc: Olof Johansson <olof@lixom.net>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: linux-rpi-kernel@lists.infradead.org
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqchip/bcm2835.h | 29 -----------------------------
 1 file changed, 29 deletions(-)
 delete mode 100644 include/linux/irqchip/bcm2835.h

(limited to 'include/linux')

diff --git a/include/linux/irqchip/bcm2835.h b/include/linux/irqchip/bcm2835.h
deleted file mode 100644
index 48a859bc9dca..000000000000
--- a/include/linux/irqchip/bcm2835.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (C) 2010 Broadcom
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#ifndef __LINUX_IRQCHIP_BCM2835_H_
-#define __LINUX_IRQCHIP_BCM2835_H_
-
-#include <asm/exception.h>
-
-extern void bcm2835_init_irq(void);
-
-extern asmlinkage void __exception_irq_entry bcm2835_handle_irq(
-	struct pt_regs *regs);
-
-#endif
-- 
cgit v1.2.3


From a8d3f362f52b65207cacbfb4c50f75e9d4751ef6 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Thu, 7 Nov 2013 12:13:27 -0600
Subject: dt/irq: add empty of_irq_count for !OF_IRQ

Add an empty version of of_irq_count for !OF_IRQ. This fixes build error
on sparc in linux-next:

drivers/gpio/gpio-bcm-kona.c:542: undefined reference to `of_irq_count'

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
---
 include/linux/of_irq.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index c0d6dfe80895..3f23b4472c31 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -37,12 +37,20 @@ extern int of_irq_parse_one(struct device_node *device, int index,
 extern unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data);
 extern int of_irq_to_resource(struct device_node *dev, int index,
 			      struct resource *r);
-extern int of_irq_count(struct device_node *dev);
 extern int of_irq_to_resource_table(struct device_node *dev,
 		struct resource *res, int nr_irqs);
 
 extern void of_irq_init(const struct of_device_id *matches);
 
+#ifdef CONFIG_OF_IRQ
+extern int of_irq_count(struct device_node *dev);
+#else
+static inline int of_irq_count(struct device_node *dev)
+{
+	return 0;
+}
+#endif
+
 #if defined(CONFIG_OF)
 /*
  * irq_of_parse_and_map() is used by all OF enabled platforms; but SPARC
-- 
cgit v1.2.3