From a69d82b9bdf1e53e94423048e8bda8c5f5a3dd4e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 7 Oct 2014 17:47:36 +0200 Subject: power_supply: Add no_thermal property to prevent recursive get_temp calls Add a 'no_thermal' property to the power supply class. If true then thermal zone won't be created for this power supply in power_supply_register(). Power supply drivers may want to set it if they support POWER_SUPPLY_PROP_TEMP and they are forwarding this get property call to other thermal zone. If they won't set it lockdep may report false positive deadlock for thermal zone's mutex because of nested calls to thermal_zone_get_temp(). First is the call to thermal_zone_get_temp() of the driver's thermal zone. Thermal core gets POWER_SUPPLY_PROP_TEMP property from this driver. The driver then calls other thermal zone thermal_zone_get_temp() and returns result. Example of such driver is charger manager. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 3ed049673022..096dbced02ac 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -200,6 +200,12 @@ struct power_supply { void (*external_power_changed)(struct power_supply *psy); void (*set_charged)(struct power_supply *psy); + /* + * Set if thermal zone should not be created for this power supply. + * For example for virtual supplies forwarding calls to actual + * sensors or other supplies. + */ + bool no_thermal; /* For APM emulation, think legacy userspace. */ int use_for_apm; -- cgit v1.2.3 From bdbe81445407644492b9ac69a24d35e3202d773b Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 13 Oct 2014 15:34:30 +0200 Subject: power: charger-manager: Fix accessing invalidated power supply after fuel gauge unbind The charger manager obtained reference to fuel gauge power supply in probe with power_supply_get_by_name() for later usage. However if fuel gauge driver was removed and re-added then this reference would point to old power supply (from driver which was removed). This lead to accessing old (and probably invalid) memory which could be observed with: $ echo "12-0036" > /sys/bus/i2c/drivers/max17042/unbind $ echo "12-0036" > /sys/bus/i2c/drivers/max17042/bind $ cat /sys/devices/virtual/power_supply/battery/capacity [ 240.480084] INFO: task cat:1393 blocked for more than 120 seconds. [ 240.484799] Not tainted 3.17.0-next-20141007-00028-ge60b6dd79570 #203 [ 240.491782] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 240.499589] cat D c0469530 0 1393 1 0x00000000 [ 240.505947] [] (__schedule) from [] (schedule_preempt_disabled+0x14/0x20) [ 240.514449] [] (schedule_preempt_disabled) from [] (mutex_lock_nested+0x1bc/0x458) [ 240.523736] [] (mutex_lock_nested) from [] (regmap_read+0x30/0x60) [ 240.531647] [] (regmap_read) from [] (max17042_get_property+0x2e8/0x350) [ 240.540055] [] (max17042_get_property) from [] (charger_get_property+0x264/0x348) [ 240.549252] [] (charger_get_property) from [] (power_supply_show_property+0x48/0x1e0) [ 240.558808] [] (power_supply_show_property) from [] (dev_attr_show+0x1c/0x48) [ 240.567664] [] (dev_attr_show) from [] (sysfs_kf_seq_show+0x84/0x104) [ 240.575814] [] (sysfs_kf_seq_show) from [] (kernfs_seq_show+0x24/0x28) [ 240.584061] [] (kernfs_seq_show) from [] (seq_read+0x1b0/0x484) [ 240.591702] [] (seq_read) from [] (vfs_read+0x88/0x144) [ 240.598640] [] (vfs_read) from [] (SyS_read+0x40/0x8c) [ 240.605507] [] (SyS_read) from [] (ret_fast_syscall+0x0/0x48) [ 240.612952] 4 locks held by cat/1393: [ 240.616589] #0: (&p->lock){+.+.+.}, at: [] seq_read+0x30/0x484 [ 240.623414] #1: (&of->mutex){+.+.+.}, at: [] kernfs_seq_start+0x1c/0x8c [ 240.631086] #2: (s_active#31){++++.+}, at: [] kernfs_seq_start+0x24/0x8c [ 240.638777] #3: (&map->mutex){+.+...}, at: [] regmap_read+0x30/0x60 The charger-manager should get reference to fuel gauge power supply on each use of get_property callback. The thermal zone 'tzd' field of power supply should not be used because of the same reason. Additionally this change solves also the issue with nested thermal_zone_get_temp() calls and related false lockdep positive for deadlock for thermal zone's mutex [1]. When fuel gauge is used as source of temperature then the charger manager forwards its get_temp calls to fuel gauge thermal zone. So actually different mutexes are used (one for charger manager thermal zone and second for fuel gauge thermal zone) but for lockdep this is one class of mutex. The recursion is removed by retrieving temperature through power supply's get_property(). In case external thermal zone is used ('cm-thermal-zone' property is present in DTS) the recursion does not exist. Charger manager simply exports POWER_SUPPLY_PROP_TEMP_AMBIENT property (instead of POWER_SUPPLY_PROP_TEMP) thus no thermal zone is created for this power supply. [1] https://lkml.org/lkml/2014/10/6/309 Signed-off-by: Krzysztof Kozlowski Cc: Fixes: 3bb3dbbd56ea ("power_supply: Add initial Charger-Manager driver") Signed-off-by: Sebastian Reichel --- include/linux/power/charger-manager.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/power/charger-manager.h b/include/linux/power/charger-manager.h index 07e7945a1ff2..5d90d329e0ba 100644 --- a/include/linux/power/charger-manager.h +++ b/include/linux/power/charger-manager.h @@ -253,7 +253,6 @@ struct charger_manager { struct device *dev; struct charger_desc *desc; - struct power_supply *fuel_gauge; struct power_supply **charger_stat; #ifdef CONFIG_THERMAL -- cgit v1.2.3 From cdaf3e15385d3232b52287e50692506f8fd01a09 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 13 Oct 2014 15:34:31 +0200 Subject: power: charger-manager: Fix accessing invalidated power supply after charger unbind The charger manager obtained in probe references to power supplies for all chargers with power_supply_get_by_name() for later usage. However if such charger driver was removed then this reference would point to old power supply (from driver which was removed). This lead to accessing invalid memory which could be observed with: $ echo "max77693-charger" > /sys/bus/platform/drivers/max77693-charger/unbind $ grep . /sys/devices/virtual/power_supply/battery/charger.0/* $ grep . /sys/devices/virtual/power_supply/battery/* [ 15.339817] Unable to handle kernel paging request at virtual address 0001c12c [ 15.346187] pgd = edd08000 [ 15.348814] [0001c12c] *pgd=6dce2831, *pte=00000000, *ppte=00000000 [ 15.355075] Internal error: Oops: 80000007 [#1] PREEMPT SMP ARM [ 15.360967] Modules linked in: [ 15.364010] CPU: 2 PID: 1388 Comm: grep Not tainted 3.17.0-next-20141007-00027-ga95e761db1b0 #245 [ 15.372859] task: ee03ad00 ti: edcf6000 task.ti: edcf6000 [ 15.378241] PC is at 0x1c12c [ 15.381113] LR is at is_ext_pwr_online+0x30/0x6c [ 15.385706] pc : [<0001c12c>] lr : [] psr: a0000013 [ 15.385706] sp : edcf7e88 ip : 00000000 fp : 00000000 [ 15.397161] r10: eeb02c08 r9 : c04b1f84 r8 : eeb02c00 [ 15.402369] r7 : edc69a10 r6 : eea6ac10 r5 : eea6ac10 r4 : 00000004 [ 15.408878] r3 : 0001c12c r2 : edcf7e8c r1 : 00000004 r0 : ee914418 [ 15.415390] Flags: NzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user [ 15.422506] Control: 10c5387d Table: 6dd0804a DAC: 00000015 [ 15.428236] Process grep (pid: 1388, stack limit = 0xedcf6240) [ 15.434050] Stack: (0xedcf7e88 to 0xedcf8000) [ 15.438395] 7e80: ee03ad00 00000000 edcf7f80 eea6aca8 edcf7ec4 c033b7b0 [ 15.446554] 7ea0: 00000001 ee1cc3f0 00000004 c06e1e44 eebdc000 c06e1e44 eeb02c00 c0337144 [ 15.454713] 7ec0: ee2dac68 c005cffc ee1cc3c0 c06e1e44 00000fff 00001000 eebdc000 c0278ca8 [ 15.462872] 7ee0: c0278c8c ee1cc3c0 eeb7ce00 c014422c edcf7f20 00008000 ee1cc3c0 ee9a48c0 [ 15.471030] 7f00: 00000001 00000001 edcf7f80 c0142d94 c0142d70 c01060f4 00021000 ee1cc3f0 [ 15.479190] 7f20: 00000000 00000000 c06a2150 eebdc000 2e7ec000 ee9a48c0 00008000 00021000 [ 15.487349] 7f40: edcf7f80 00008000 edcf6000 00021000 00021000 c00e39a4 00000000 ee9a48c0 [ 15.495508] 7f60: 00004000 00000000 00000000 ee9a48c0 ee9a48c0 00008000 00021000 c00e3aa0 [ 15.503668] 7f80: 00000000 00000000 0001f2e0 0001f2e0 00021000 00001000 00000003 c000f364 [ 15.511826] 7fa0: 00000000 c000f1a0 0001f2e0 00021000 00000003 00021000 00008000 00000000 [ 15.519986] 7fc0: 0001f2e0 00021000 00001000 00000003 00000001 000205e8 00000000 00021000 [ 15.528145] 7fe0: 00008000 bebbe910 0000a7ad b6edc49c 60000010 00000003 aaaaaaaa aaaaaaaa [ 15.536320] [] (is_ext_pwr_online) from [] (charger_get_property+0x170/0x314) [ 15.545164] [] (charger_get_property) from [] (power_supply_show_property+0x48/0x20c) [ 15.554719] [] (power_supply_show_property) from [] (dev_attr_show+0x1c/0x48) [ 15.563577] [] (dev_attr_show) from [] (sysfs_kf_seq_show+0x84/0x104) [ 15.571725] [] (sysfs_kf_seq_show) from [] (kernfs_seq_show+0x24/0x28) [ 15.579973] [] (kernfs_seq_show) from [] (seq_read+0x1b0/0x484) [ 15.587614] [] (seq_read) from [] (vfs_read+0x88/0x144) [ 15.594552] [] (vfs_read) from [] (SyS_read+0x40/0x8c) [ 15.601417] [] (SyS_read) from [] (ret_fast_syscall+0x0/0x48) [ 15.608877] Code: bad PC value [ 15.611991] ---[ end trace a88fcc95208db283 ]--- The charger-manager should get reference to charger power supply on each use of get_property callback. Signed-off-by: Krzysztof Kozlowski Cc: Fixes: 3bb3dbbd56ea ("power_supply: Add initial Charger-Manager driver") Signed-off-by: Sebastian Reichel --- include/linux/power/charger-manager.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/power/charger-manager.h b/include/linux/power/charger-manager.h index 5d90d329e0ba..e97fc656a058 100644 --- a/include/linux/power/charger-manager.h +++ b/include/linux/power/charger-manager.h @@ -253,8 +253,6 @@ struct charger_manager { struct device *dev; struct charger_desc *desc; - struct power_supply **charger_stat; - #ifdef CONFIG_THERMAL struct thermal_zone_device *tzd_batt; #endif -- cgit v1.2.3 From 9cdb5dbf79f4e8f43e19ab7f4ec9ed74c146f0af Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 5 Nov 2014 21:44:27 +0100 Subject: include/linux/socket.h: Fix comment File descriptors are always closed on exit :-) Signed-off-by: Rasmus Villemoes Signed-off-by: David S. Miller --- include/linux/socket.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index ec538fc287a6..bb9b83640070 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -256,7 +256,7 @@ struct ucred { #define MSG_EOF MSG_FIN #define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ -#define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exit for file +#define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exec for file descriptor received through SCM_RIGHTS */ #if defined(CONFIG_COMPAT) -- cgit v1.2.3 From c16561e8df7a64764ef61f02221e98273add325a Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 6 Nov 2014 00:37:08 +0100 Subject: PM / Domains: Change prototype for the attach and detach callbacks Convert the prototypes to return an int in order to support error handling in these callbacks. Also, as suggested by Dmitry Torokhov, pass the domain pointer for use inside the callbacks, and so that they match the existing power_on/power_off callbacks which currently take the domain pointer. Acked-by: Dmitry Torokhov Acked-by: Geert Uytterhoeven Signed-off-by: Ulf Hansson [ khilman: added domain as parameter to callbacks, as suggested by Dmitry ] Signed-off-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 73e938b7e937..b3ed7766a291 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -72,8 +72,10 @@ struct generic_pm_domain { bool max_off_time_changed; bool cached_power_down_ok; struct gpd_cpuidle_data *cpuidle_data; - void (*attach_dev)(struct device *dev); - void (*detach_dev)(struct device *dev); + int (*attach_dev)(struct generic_pm_domain *domain, + struct device *dev); + void (*detach_dev)(struct generic_pm_domain *domain, + struct device *dev); }; static inline struct generic_pm_domain *pd_to_genpd(struct dev_pm_domain *pd) -- cgit v1.2.3 From c0acb8144bd6d8d88aee1dab33364b7353e9a903 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 10 Oct 2014 12:48:35 +0200 Subject: mfd: max77693: Fix always masked MUIC interrupts All interrupts coming from MUIC were ignored because interrupt source register was masked. The Maxim 77693 has a "interrupt source" - a separate register and interrupts which give information about PMIC block triggering the individual interrupt (charger, topsys, MUIC, flash LED). By default bootloader could initialize this register to "mask all" value. In such case (observed on Trats2 board) MUIC interrupts won't be generated regardless of their mask status. Regmap irq chip was unmasking individual MUIC interrupts but the source was masked Before introducing regmap irq chip this interrupt source was unmasked, read and acked. Reading and acking is not necessary but unmasking is. Fixes: 342d669c1ee4 ("mfd: max77693: Handle IRQs using regmap") Cc: Signed-off-by: Krzysztof Kozlowski Reviewed-by: Chanwoo Choi Signed-off-by: Lee Jones --- include/linux/mfd/max77693-private.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h index fc17d56581b2..582e67f34054 100644 --- a/include/linux/mfd/max77693-private.h +++ b/include/linux/mfd/max77693-private.h @@ -330,6 +330,13 @@ enum max77693_irq_source { MAX77693_IRQ_GROUP_NR, }; +#define SRC_IRQ_CHARGER BIT(0) +#define SRC_IRQ_TOP BIT(1) +#define SRC_IRQ_FLASH BIT(2) +#define SRC_IRQ_MUIC BIT(3) +#define SRC_IRQ_ALL (SRC_IRQ_CHARGER | SRC_IRQ_TOP \ + | SRC_IRQ_FLASH | SRC_IRQ_MUIC) + #define LED_IRQ_FLED2_OPEN BIT(0) #define LED_IRQ_FLED2_SHORT BIT(1) #define LED_IRQ_FLED1_OPEN BIT(2) -- cgit v1.2.3 From e30f53aad2202b5526c40c36d8eeac8bf290bde5 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Mon, 10 Nov 2014 19:46:34 +0100 Subject: tracing: Do not busy wait in buffer splice On a !PREEMPT kernel, attempting to use trace-cmd results in a soft lockup: # trace-cmd record -e raw_syscalls:* -F false NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [trace-cmd:61] ... Call Trace: [] ? __wake_up_common+0x90/0x90 [] wait_on_pipe+0x35/0x40 [] tracing_buffers_splice_read+0x2e3/0x3c0 [] ? tracing_stats_read+0x2a0/0x2a0 [] ? _raw_spin_unlock+0x2b/0x40 [] ? do_read_fault+0x21b/0x290 [] ? handle_mm_fault+0x2ba/0xbd0 [] ? trace_event_buffer_lock_reserve+0x40/0x80 [] ? trace_buffer_lock_reserve+0x22/0x60 [] ? trace_event_buffer_lock_reserve+0x40/0x80 [] do_splice_to+0x6d/0x90 [] SyS_splice+0x7c1/0x800 [] tracesys_phase2+0xd3/0xd8 The problem is this: tracing_buffers_splice_read() calls ring_buffer_wait() to wait for data in the ring buffers. The buffers are not empty so ring_buffer_wait() returns immediately. But tracing_buffers_splice_read() calls ring_buffer_read_page() with full=1, meaning it only wants to read a full page. When the full page is not available, tracing_buffers_splice_read() tries to wait again with ring_buffer_wait(), which again returns immediately, and so on. Fix this by adding a "full" argument to ring_buffer_wait() which will make ring_buffer_wait() wait until the writer has left the reader's page, i.e. until full-page reads will succeed. Link: http://lkml.kernel.org/r/1415645194-25379-1-git-send-email-rabin@rab.in Cc: stable@vger.kernel.org # 3.16+ Fixes: b1169cc69ba9 ("tracing: Remove mock up poll wait function") Signed-off-by: Rabin Vincent Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 49a4d6f59108..e2c13cd863bd 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -97,7 +97,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k __ring_buffer_alloc((size), (flags), &__key); \ }) -int ring_buffer_wait(struct ring_buffer *buffer, int cpu); +int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full); int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, struct file *filp, poll_table *poll_table); -- cgit v1.2.3 From 67732cd34382066ae5df313b6dad65ab14b9735f Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 11 Nov 2014 11:07:08 +0100 Subject: PM / Domains: Fix initial default state of the need_restore flag The initial state of the device's need_restore flag should'nt depend on the current state of the PM domain. For example it should be perfectly valid to attach an inactive device to a powered PM domain. The pm_genpd_dev_need_restore() API allow us to update the need_restore flag to somewhat cope with such scenarios. Typically that should have been done from drivers/buses ->probe() since it's those that put the requirements on the value of the need_restore flag. Until recently, the Exynos SOCs were the only user of the pm_genpd_dev_need_restore() API, though invoking it from a centralized location while adding devices to their PM domains. Due to that Exynos now have swithed to the generic OF-based PM domain look-up, it's no longer possible to invoke the API from a centralized location. The reason is because devices are now added to their PM domains during the probe sequence. Commit "ARM: exynos: Move to generic PM domain DT bindings" did the switch for Exynos to the generic OF-based PM domain look-up, but it also removed the call to pm_genpd_dev_need_restore(). This caused a regression for some of the Exynos drivers. To handle things more properly in the generic PM domain, let's change the default initial value of the need_restore flag to reflect that the state is unknown. As soon as some of the runtime PM callbacks gets invoked, update the initial value accordingly. Moreover, since the generic PM domain is verifying that all devices are both runtime PM enabled and suspended, using pm_runtime_suspended() while pm_genpd_poweroff() is invoked from the scheduled work, we can be sure of that the PM domain won't be powering off while having active devices. Do note that, the generic PM domain can still only know about active devices which has been activated through invoking its runtime PM resume callback. In other words, buses/drivers using pm_runtime_set_active() during ->probe() will still suffer from a race condition, potentially probing a device without having its PM domain being powered. That issue will have to be solved using a different approach. This a log from the boot regression for Exynos5, which is being fixed in this patch. ------------[ cut here ]------------ WARNING: CPU: 0 PID: 308 at ../drivers/clk/clk.c:851 clk_disable+0x24/0x30() Modules linked in: CPU: 0 PID: 308 Comm: kworker/0:1 Not tainted 3.18.0-rc3-00569-gbd9449f-dirty #10 Workqueue: pm pm_runtime_work [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x70/0xbc) [] (dump_stack) from [] (warn_slowpath_common+0x64/0x88) [] (warn_slowpath_common) from [] (warn_slowpath_null+0x1c/0x24) [] (warn_slowpath_null) from [] (clk_disable+0x24/0x30) [] (clk_disable) from [] (gsc_runtime_suspend+0x128/0x160) [] (gsc_runtime_suspend) from [] (pm_generic_runtime_suspend+0x2c/0x38) [] (pm_generic_runtime_suspend) from [] (pm_genpd_default_save_state+0x2c/0x8c) [] (pm_genpd_default_save_state) from [] (pm_genpd_poweroff+0x224/0x3ec) [] (pm_genpd_poweroff) from [] (pm_genpd_runtime_suspend+0x9c/0xcc) [] (pm_genpd_runtime_suspend) from [] (__rpm_callback+0x2c/0x60) [] (__rpm_callback) from [] (rpm_callback+0x20/0x74) [] (rpm_callback) from [] (rpm_suspend+0xd4/0x43c) [] (rpm_suspend) from [] (pm_runtime_work+0x80/0x90) [] (pm_runtime_work) from [] (process_one_work+0x12c/0x314) [] (process_one_work) from [] (worker_thread+0x3c/0x4b0) [] (worker_thread) from [] (kthread+0xcc/0xe8) [] (kthread) from [] (ret_from_fork+0x14/0x3c) ---[ end trace 40cd58bcd6988f12 ]--- Fixes: a4a8c2c4962bb655 (ARM: exynos: Move to generic PM domain DT bindings) Reported-and-tested0by: Sylwester Nawrocki Reviewed-by: Sylwester Nawrocki Reviewed-by: Kevin Hilman Signed-off-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index b3ed7766a291..2e0e06daf8c0 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -106,7 +106,7 @@ struct generic_pm_domain_data { struct notifier_block nb; struct mutex lock; unsigned int refcount; - bool need_restore; + int need_restore; }; #ifdef CONFIG_PM_GENERIC_DOMAINS -- cgit v1.2.3 From 8c393f9a721c30a030049a680e1bf896669bb279 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Wed, 5 Nov 2014 22:36:50 +0800 Subject: nfs: fix pnfs direct write memory leak For pNFS direct writes, layout driver may dynamically allocate ds_cinfo.buckets. So we need to take care to free them when freeing dreq. Ideally this needs to be done inside layout driver where ds_cinfo.buckets are allocated. But buckets are attached to dreq and reused across LD IO iterations. So I feel it's OK to free them in the generic layer. Cc: stable@vger.kernel.org [v3.4+] Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 983876f24aed..47ebb4fafd87 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1224,11 +1224,22 @@ struct nfs41_free_stateid_res { unsigned int status; }; +static inline void +nfs_free_pnfs_ds_cinfo(struct pnfs_ds_commit_info *cinfo) +{ + kfree(cinfo->buckets); +} + #else struct pnfs_ds_commit_info { }; +static inline void +nfs_free_pnfs_ds_cinfo(struct pnfs_ds_commit_info *cinfo) +{ +} + #endif /* CONFIG_NFS_V4_1 */ #ifdef CONFIG_NFS_V4_2 -- cgit v1.2.3 From ad53f92eb416d81e469fa8ea57153e59455e7175 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Thu, 13 Nov 2014 15:19:11 -0800 Subject: mm/page_alloc: fix incorrect isolation behavior by rechecking migratetype Before describing bugs itself, I first explain definition of freepage. 1. pages on buddy list are counted as freepage. 2. pages on isolate migratetype buddy list are *not* counted as freepage. 3. pages on cma buddy list are counted as CMA freepage, too. Now, I describe problems and related patch. Patch 1: There is race conditions on getting pageblock migratetype that it results in misplacement of freepages on buddy list, incorrect freepage count and un-availability of freepage. Patch 2: Freepages on pcp list could have stale cached information to determine migratetype of buddy list to go. This causes misplacement of freepages on buddy list and incorrect freepage count. Patch 4: Merging between freepages on different migratetype of pageblocks will cause freepages accouting problem. This patch fixes it. Without patchset [3], above problem doesn't happens on my CMA allocation test, because CMA reserved pages aren't used at all. So there is no chance for above race. With patchset [3], I did simple CMA allocation test and get below result: - Virtual machine, 4 cpus, 1024 MB memory, 256 MB CMA reservation - run kernel build (make -j16) on background - 30 times CMA allocation(8MB * 30 = 240MB) attempts in 5 sec interval - Result: more than 5000 freepage count are missed With patchset [3] and this patchset, I found that no freepage count are missed so that I conclude that problems are solved. On my simple memory offlining test, these problems also occur on that environment, too. This patch (of 4): There are two paths to reach core free function of buddy allocator, __free_one_page(), one is free_one_page()->__free_one_page() and the other is free_hot_cold_page()->free_pcppages_bulk()->__free_one_page(). Each paths has race condition causing serious problems. At first, this patch is focused on first type of freepath. And then, following patch will solve the problem in second type of freepath. In the first type of freepath, we got migratetype of freeing page without holding the zone lock, so it could be racy. There are two cases of this race. 1. pages are added to isolate buddy list after restoring orignal migratetype CPU1 CPU2 get migratetype => return MIGRATE_ISOLATE call free_one_page() with MIGRATE_ISOLATE grab the zone lock unisolate pageblock release the zone lock grab the zone lock call __free_one_page() with MIGRATE_ISOLATE freepage go into isolate buddy list, although pageblock is already unisolated This may cause two problems. One is that we can't use this page anymore until next isolation attempt of this pageblock, because freepage is on isolate buddy list. The other is that freepage accouting could be wrong due to merging between different buddy list. Freepages on isolate buddy list aren't counted as freepage, but ones on normal buddy list are counted as freepage. If merge happens, buddy freepage on normal buddy list is inevitably moved to isolate buddy list without any consideration of freepage accouting so it could be incorrect. 2. pages are added to normal buddy list while pageblock is isolated. It is similar with above case. This also may cause two problems. One is that we can't keep these freepages from being allocated. Although this pageblock is isolated, freepage would be added to normal buddy list so that it could be allocated without any restriction. And the other problem is same as case 1, that it, incorrect freepage accouting. This race condition would be prevented by checking migratetype again with holding the zone lock. Because it is somewhat heavy operation and it isn't needed in common case, we want to avoid rechecking as much as possible. So this patch introduce new variable, nr_isolate_pageblock in struct zone to check if there is isolated pageblock. With this, we can avoid to re-check migratetype in common case and do it only if there is isolated pageblock or migratetype is MIGRATE_ISOLATE. This solve above mentioned problems. Changes from v3: Add one more check in free_one_page() that checks whether migratetype is MIGRATE_ISOLATE or not. Without this, abovementioned case 1 could happens. Signed-off-by: Joonsoo Kim Acked-by: Minchan Kim Acked-by: Michal Nazarewicz Acked-by: Vlastimil Babka Cc: "Kirill A. Shutemov" Cc: Mel Gorman Cc: Johannes Weiner Cc: Yasuaki Ishimatsu Cc: Zhang Yanfei Cc: Tang Chen Cc: Naoya Horiguchi Cc: Bartlomiej Zolnierkiewicz Cc: Wen Congyang Cc: Marek Szyprowski Cc: Laura Abbott Cc: Heesub Shin Cc: "Aneesh Kumar K.V" Cc: Ritesh Harjani Cc: Gioh Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 9 +++++++++ include/linux/page-isolation.h | 8 ++++++++ 2 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 48bf12ef6620..ffe66e381c04 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -431,6 +431,15 @@ struct zone { */ int nr_migrate_reserve_block; +#ifdef CONFIG_MEMORY_ISOLATION + /* + * Number of isolated pageblock. It is used to solve incorrect + * freepage counting problem due to racy retrieving migratetype + * of pageblock. Protected by zone->lock. + */ + unsigned long nr_isolate_pageblock; +#endif + #ifdef CONFIG_MEMORY_HOTPLUG /* see spanned/present_pages for more description */ seqlock_t span_seqlock; diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 3fff8e774067..2dc1e1697b45 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -2,6 +2,10 @@ #define __LINUX_PAGEISOLATION_H #ifdef CONFIG_MEMORY_ISOLATION +static inline bool has_isolate_pageblock(struct zone *zone) +{ + return zone->nr_isolate_pageblock; +} static inline bool is_migrate_isolate_page(struct page *page) { return get_pageblock_migratetype(page) == MIGRATE_ISOLATE; @@ -11,6 +15,10 @@ static inline bool is_migrate_isolate(int migratetype) return migratetype == MIGRATE_ISOLATE; } #else +static inline bool has_isolate_pageblock(struct zone *zone) +{ + return false; +} static inline bool is_migrate_isolate_page(struct page *page) { return false; -- cgit v1.2.3 From f784a3f19613901ca4539a5b0eed3bdc700e6ee7 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Thu, 13 Nov 2014 15:19:39 -0800 Subject: mem-hotplug: reset node managed pages when hot-adding a new pgdat In free_area_init_core(), zone->managed_pages is set to an approximate value for lowmem, and will be adjusted when the bootmem allocator frees pages into the buddy system. But free_area_init_core() is also called by hotadd_new_pgdat() when hot-adding memory. As a result, zone->managed_pages of the newly added node's pgdat is set to an approximate value in the very beginning. Even if the memory on that node has node been onlined, /sys/device/system/node/nodeXXX/meminfo has wrong value: hot-add node2 (memory not onlined) cat /sys/device/system/node/node2/meminfo Node 2 MemTotal: 33554432 kB Node 2 MemFree: 0 kB Node 2 MemUsed: 33554432 kB Node 2 Active: 0 kB This patch fixes this problem by reset node managed pages to 0 after hot-adding a new node. 1. Move reset_managed_pages_done from reset_node_managed_pages() to reset_all_zones_managed_pages() 2. Make reset_node_managed_pages() non-static 3. Call reset_node_managed_pages() in hotadd_new_pgdat() after pgdat is initialized Signed-off-by: Tang Chen Signed-off-by: Yasuaki Ishimatsu Cc: [3.16+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bootmem.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 4e2bd4c95b66..0995c2de8162 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -46,6 +46,7 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat, extern unsigned long init_bootmem(unsigned long addr, unsigned long memend); extern unsigned long free_all_bootmem(void); +extern void reset_node_managed_pages(pg_data_t *pgdat); extern void reset_all_zones_managed_pages(void); extern void free_bootmem_node(pg_data_t *pgdat, -- cgit v1.2.3 From 84bc88688e3f6ef843aa8803dbcd90168bb89faf Mon Sep 17 00:00:00 2001 From: Vincent BENAYOUN Date: Thu, 13 Nov 2014 13:47:26 +0100 Subject: inetdevice: fixed signed integer overflow There could be a signed overflow in the following code. The expression, (32-logmask) is comprised between 0 and 31 included. It may be equal to 31. In such a case the left shift will produce a signed integer overflow. According to the C99 Standard, this is an undefined behavior. A simple fix is to replace the signed int 1 with the unsigned int 1U. Signed-off-by: Vincent BENAYOUN Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 0068708161ff..0a21fbefdfbe 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -242,7 +242,7 @@ static inline void in_dev_put(struct in_device *idev) static __inline__ __be32 inet_make_mask(int logmask) { if (logmask) - return htonl(~((1<<(32-logmask))-1)); + return htonl(~((1U<<(32-logmask))-1)); return 0; } -- cgit v1.2.3 From ccf54555da9a5e91e454b909ca6a5303c7d6b910 Mon Sep 17 00:00:00 2001 From: Cristina Ciocan Date: Tue, 11 Nov 2014 16:07:42 +0200 Subject: iio: Fix IIO_EVENT_CODE_EXTRACT_DIR bit mask The direction field is set on 7 bits, thus we need to AND it with 0111 111 mask in order to retrieve it, that is 0x7F, not 0xCF as it is now. Fixes: ade7ef7ba (staging:iio: Differential channel handling) Signed-off-by: Cristina Ciocan Cc: Signed-off-by: Jonathan Cameron --- include/linux/iio/events.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/events.h b/include/linux/iio/events.h index 8bbd7bc1043d..03fa332ad2a8 100644 --- a/include/linux/iio/events.h +++ b/include/linux/iio/events.h @@ -72,7 +72,7 @@ struct iio_event_data { #define IIO_EVENT_CODE_EXTRACT_TYPE(mask) ((mask >> 56) & 0xFF) -#define IIO_EVENT_CODE_EXTRACT_DIR(mask) ((mask >> 48) & 0xCF) +#define IIO_EVENT_CODE_EXTRACT_DIR(mask) ((mask >> 48) & 0x7F) #define IIO_EVENT_CODE_EXTRACT_CHAN_TYPE(mask) ((mask >> 32) & 0xFF) -- cgit v1.2.3 From 00b4d9a14125f1e51874def2b9de6092e007412d Mon Sep 17 00:00:00 2001 From: Maxime COQUELIN Date: Thu, 6 Nov 2014 10:54:19 +0100 Subject: bitops: Fix shift overflow in GENMASK macros On some 32 bits architectures, including x86, GENMASK(31, 0) returns 0 instead of the expected ~0UL. This is the same on some 64 bits architectures with GENMASK_ULL(63, 0). This is due to an overflow in the shift operand, 1 << 32 for GENMASK, 1 << 64 for GENMASK_ULL. Reported-by: Eric Paire Suggested-by: Rasmus Villemoes Signed-off-by: Maxime Coquelin Signed-off-by: Peter Zijlstra (Intel) Cc: # v3.13+ Cc: linux@rasmusvillemoes.dk Cc: gong.chen@linux.intel.com Cc: John Sullivan Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Theodore Ts'o Fixes: 10ef6b0dffe4 ("bitops: Introduce a more generic BITMASK macro") Link: http://lkml.kernel.org/r/1415267659-10563-1-git-send-email-maxime.coquelin@st.com Signed-off-by: Ingo Molnar --- include/linux/bitops.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index be5fd38bd5a0..5d858e02997f 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -18,8 +18,11 @@ * position @h. For example * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. */ -#define GENMASK(h, l) (((U32_C(1) << ((h) - (l) + 1)) - 1) << (l)) -#define GENMASK_ULL(h, l) (((U64_C(1) << ((h) - (l) + 1)) - 1) << (l)) +#define GENMASK(h, l) \ + (((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) + +#define GENMASK_ULL(h, l) \ + (((~0ULL) << (l)) & (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h)))) extern unsigned int __sw_hweight8(unsigned int w); extern unsigned int __sw_hweight16(unsigned int w); -- cgit v1.2.3 From 23cfa361f3e54a3e184a5e126bbbdd95f984881a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 12 Nov 2014 12:37:37 +0100 Subject: sched/cputime: Fix cpu_timer_sample_group() double accounting While looking over the cpu-timer code I found that we appear to add the delta for the calling task twice, through: cpu_timer_sample_group() thread_group_cputimer() thread_group_cputime() times->sum_exec_runtime += task_sched_runtime(); *sample = cputime.sum_exec_runtime + task_delta_exec(); Which would make the sample run ahead, making the sleep short. Signed-off-by: Peter Zijlstra (Intel) Cc: KOSAKI Motohiro Cc: Oleg Nesterov Cc: Stanislaw Gruszka Cc: Christoph Lameter Cc: Frederic Weisbecker Cc: Linus Torvalds Cc: Rik van Riel Cc: Tejun Heo Link: http://lkml.kernel.org/r/20141112113737.GI10476@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/linux/kernel_stat.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 8422b4ed6882..b9376cd5a187 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -77,11 +77,6 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu) return kstat_cpu(cpu).irqs_sum; } -/* - * Lock/unlock the current runqueue - to extract task statistics: - */ -extern unsigned long long task_delta_exec(struct task_struct *); - extern void account_user_time(struct task_struct *, cputime_t, cputime_t); extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t); extern void account_steal_time(cputime_t); -- cgit v1.2.3 From e6d5e7d90be92cee626d7ec16ca9b06f1eed710b Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 14 Nov 2014 15:32:09 +0000 Subject: clk-divider: Fix READ_ONLY when divider > 1 Commit 79c6ab509558 (clk: divider: add CLK_DIVIDER_READ_ONLY flag) in v3.16 introduced the CLK_DIVIDER_READ_ONLY flag which caused the recalc_rate() and round_rate() clock callbacks to be omitted. However using this flag has the unfortunate side effect of causing the clock recalculation code when a clock rate change is attempted to always treat it as a pass-through clock, i.e. with a fixed divide of 1, which may not be the case. Child clock rates are then recalculated using the wrong parent rate. Therefore instead of dropping the recalc_rate() and round_rate() callbacks, alter clk_divider_bestdiv() to always report the current divider as the best divider so that it is never altered. For me the read only clock was the system clock, which divided the PLL rate by 2, from which both the UART and the SPI clocks were divided. Initial setting of the UART rate set it correctly, but when the SPI clock was set, the other child clocks were miscalculated. The UART clock was recalculated using the PLL rate as the parent rate, resulting in a UART new_rate of double what it should be, and a UART which spewed forth garbage when the rate changes were propagated. Signed-off-by: James Hogan Cc: Thomas Abraham Cc: Tomasz Figa Cc: Max Schwarz Cc: # v3.16+ Acked-by: Haojian Zhuang Signed-off-by: Michael Turquette --- include/linux/clk-provider.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index be21af149f11..2839c639f092 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -352,7 +352,6 @@ struct clk_divider { #define CLK_DIVIDER_READ_ONLY BIT(5) extern const struct clk_ops clk_divider_ops; -extern const struct clk_ops clk_divider_ro_ops; struct clk *clk_register_divider(struct device *dev, const char *name, const char *parent_name, unsigned long flags, void __iomem *reg, u8 shift, u8 width, -- cgit v1.2.3 From 98e69016a11b2b9398bea668442193b3b362cd43 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Fri, 7 Nov 2014 16:45:12 +0800 Subject: can: dev: add can_is_canfd_skb() API The CAN device drivers can use can_is_canfd_skb() to check if the frame to send is on CAN FD mode or normal CAN mode. Acked-by: Oliver Hartkopp Signed-off-by: Dong Aisheng Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 6992afc6ba7f..b37ea95bc348 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -99,6 +99,12 @@ inval_skb: return 1; } +static inline bool can_is_canfd_skb(const struct sk_buff *skb) +{ + /* the CAN specific type of skb is identified by its data length */ + return skb->len == CANFD_MTU; +} + /* get data length from can_dlc with sanitized can_dlc */ u8 can_dlc2len(u8 can_dlc); -- cgit v1.2.3 From 4aab3b5b3ccf94fc907e66233e6ca4d8675759a6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 22 Nov 2014 09:22:42 -0500 Subject: percpu-ref: fix DEAD flag contamination of percpu pointer While decoupling ATOMIC and DEAD flags, f47ad4578461 ("percpu_ref: decouple switching to percpu mode and reinit") updated __ref_is_percpu() so that it only tests ATOMIC flag to determine whether the ref is in percpu mode or not; however, while DEAD implies ATOMIC, the two flags are set separately during percpu_ref_kill() and if __ref_is_percpu() races percpu_ref_kill(), it may see DEAD w/o ATOMIC. Because __ref_is_percpu() returns @ref->percpu_count_ptr value verbatim as the percpu pointer after testing ATOMIC, the pointer may now be contaminated with the DEAD flag. This can be fixed by clearing the flag bits before returning the pointer which was the fix proposed by Shaohua; however, as DEAD implies ATOMIC, we can just test for both flags at once and avoid the explicit masking. Update __ref_is_percpu() so that it tests that both ATOMIC and DEAD are clear before returning @ref->percpu_count_ptr as the percpu pointer. Signed-off-by: Tejun Heo Reported-and-Reviewed-by: Shaohua Li Link: http://lkml.kernel.org/r/995deb699f5b873c45d667df4add3b06f73c2c25.1416638887.git.shli@kernel.org Fixes: f47ad4578461 ("percpu_ref: decouple switching to percpu mode and reinit") --- include/linux/percpu-refcount.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index d5c89e0dd0e6..51ce60c35f4c 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -133,7 +133,13 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref, /* paired with smp_store_release() in percpu_ref_reinit() */ smp_read_barrier_depends(); - if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC)) + /* + * Theoretically, the following could test just ATOMIC; however, + * then we'd have to mask off DEAD separately as DEAD may be + * visible without ATOMIC if we race with percpu_ref_kill(). DEAD + * implies ATOMIC anyway. Test them together. + */ + if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC_DEAD)) return false; *percpu_countp = (unsigned long __percpu *)percpu_ptr; -- cgit v1.2.3 From f144d1496b47e7450f41b767d0d91c724c2198bc Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 3 Oct 2014 15:13:24 +1000 Subject: PCI/MSI: Add device flag indicating that 64-bit MSIs don't work This can be set by quirks/drivers to be used by the architecture code that assigns the MSI addresses. We additionally add verification in the core MSI code that the values assigned by the architecture do satisfy the limitation in order to fail gracefully if they don't (ie. the arch hasn't been updated to deal with that quirk yet). Signed-off-by: Benjamin Herrenschmidt CC: Acked-by: Bjorn Helgaas --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 5be8db45e368..4c8ac5fcc224 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -331,6 +331,7 @@ struct pci_dev { unsigned int is_added:1; unsigned int is_busmaster:1; /* device is busmaster */ unsigned int no_msi:1; /* device may not use msi */ + unsigned int no_64bit_msi:1; /* device may only use 32-bit MSIs */ unsigned int block_cfg_access:1; /* config space access is blocked */ unsigned int broken_parity_status:1; /* Device generates false positive parity */ unsigned int irq_reroute_variant:2; /* device needs IRQ rerouting variant */ -- cgit v1.2.3 From d3fccc7ef831d1d829b4da5eaa081db55b1e38f3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 10 Nov 2014 09:33:56 +0100 Subject: kvm: fix kvm_is_mmio_pfn() and rename to kvm_is_reserved_pfn() This reverts commit 85c8555ff0 ("KVM: check for !is_zero_pfn() in kvm_is_mmio_pfn()") and renames the function to kvm_is_reserved_pfn. The problem being addressed by the patch above was that some ARM code based the memory mapping attributes of a pfn on the return value of kvm_is_mmio_pfn(), whose name indeed suggests that such pfns should be mapped as device memory. However, kvm_is_mmio_pfn() doesn't do quite what it says on the tin, and the existing non-ARM users were already using it in a way which suggests that its name should probably have been 'kvm_is_reserved_pfn' from the beginning, e.g., whether or not to call get_page/put_page on it etc. This means that returning false for the zero page is a mistake and the patch above should be reverted. Signed-off-by: Ard Biesheuvel Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ea53b04993f2..a6059bdf7b03 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -703,7 +703,7 @@ void kvm_arch_sync_events(struct kvm *kvm); int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); -bool kvm_is_mmio_pfn(pfn_t pfn); +bool kvm_is_reserved_pfn(pfn_t pfn); struct kvm_irq_ack_notifier { struct hlist_node link; -- cgit v1.2.3