From 163f9eb95a10371ead59b62087e0d4e7ce53112e Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 21 Jan 2015 20:03:40 +0000 Subject: debugfs: Provide a file creation function that also takes an initial size Provide a file creation function that also takes an initial size so that the caller doesn't have to set i_size, thus meaning that we don't have to call deal with ->d_inode in the callers. Signed-off-by: David Howells --- include/linux/debugfs.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index ea149a24a1f2..cb25af461054 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -51,6 +51,11 @@ struct dentry *debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops); +struct dentry *debugfs_create_file_size(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops, + loff_t file_size); + struct dentry *debugfs_create_dir(const char *name, struct dentry *parent); struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, @@ -129,6 +134,14 @@ static inline struct dentry *debugfs_create_file(const char *name, umode_t mode, return ERR_PTR(-ENODEV); } +static inline struct dentry *debugfs_create_file_size(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops, + loff_t file_size) +{ + return ERR_PTR(-ENODEV); +} + static inline struct dentry *debugfs_create_dir(const char *name, struct dentry *parent) { -- cgit v1.2.3 From 4282d60689d4f21b40692029080440cc58e8a17d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 20 Jan 2015 11:36:55 -0500 Subject: tracefs: Add new tracefs file system Add a separate file system to handle the tracing directory. Currently it is part of debugfs, but that is starting to show its limits. One thing is that in order to access the tracing infrastructure, you need to mount debugfs. As that includes debugging from all sorts of sub systems in the kernel, it is not considered advisable to mount such an all encompassing debugging system. Having the tracing system in its own file systems gives access to the tracing sub system without needing to include all other systems. Another problem with tracing using the debugfs system is that the instances use mkdir to create sub buffers. debugfs does not support mkdir from userspace so to implement it, special hacks were used. By controlling the file system that the tracing infrastructure uses, this can be properly done without hacks. Signed-off-by: Steven Rostedt --- include/linux/tracefs.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 include/linux/tracefs.h (limited to 'include/linux') diff --git a/include/linux/tracefs.h b/include/linux/tracefs.h new file mode 100644 index 000000000000..23e04ce21749 --- /dev/null +++ b/include/linux/tracefs.h @@ -0,0 +1,41 @@ +/* + * tracefs.h - a pseudo file system for activating tracing + * + * Based on debugfs by: 2004 Greg Kroah-Hartman + * + * Copyright (C) 2014 Red Hat Inc, author: Steven Rostedt + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * tracefs is the file system that is used by the tracing infrastructure. + * + */ + +#ifndef _TRACEFS_H_ +#define _TRACEFS_H_ + +#include +#include + +#include + +struct file_operations; + +#ifdef CONFIG_TRACING + +struct dentry *tracefs_create_file(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops); + +struct dentry *tracefs_create_dir(const char *name, struct dentry *parent); + +void tracefs_remove(struct dentry *dentry); +void tracefs_remove_recursive(struct dentry *dentry); + +bool tracefs_initialized(void); + +#endif /* CONFIG_TRACING */ + +#endif -- cgit v1.2.3 From eae473581cf93dad94ca833aa961c033c6a43924 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 21 Jan 2015 10:01:39 -0500 Subject: tracing: Have mkdir and rmdir be part of tracefs The tracing "instances" directory can create sub tracing buffers with mkdir, and remove them with rmdir. As a mkdir will also create all the files and directories that control the sub buffer the inode mutexes need to be released before this is done, to avoid deadlocks. It is better to let the tracing system unlock the inode mutexes before calling the functions that create the files within the new directory (or deletes the files from the one being destroyed). Now that tracing has been converted over to tracefs, the tracefs file system can be modified to accommodate this feature. It still releases the locks, but the filesystem itself can take care of the ugly business and let the user just do what it needs. The tracing system now attaches a descriptor to the directory dentry that can have userspace create or remove sub directories. If this descriptor does not exist for a dentry, then that dentry can not be used to create other directories. This descriptor holds a mkdir and rmdir method that only takes a character string as an argument. The tracefs file system will first make a copy of the dentry name before releasing the locks. Then it will pass the copied name to the methods. It is up to the tracing system that supplied the methods to handle races with duplicate names and such as all the inode mutexes would be released when the functions are called. Cc: Al Viro Signed-off-by: Steven Rostedt --- include/linux/tracefs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tracefs.h b/include/linux/tracefs.h index 23e04ce21749..5b727a17beee 100644 --- a/include/linux/tracefs.h +++ b/include/linux/tracefs.h @@ -34,6 +34,10 @@ struct dentry *tracefs_create_dir(const char *name, struct dentry *parent); void tracefs_remove(struct dentry *dentry); void tracefs_remove_recursive(struct dentry *dentry); +struct dentry *tracefs_create_instance_dir(const char *name, struct dentry *parent, + int (*mkdir)(const char *name), + int (*rmdir)(const char *name)); + bool tracefs_initialized(void); #endif /* CONFIG_TRACING */ -- cgit v1.2.3 From 306f7aa1807be7588f115d7cafa475f65e72e3d1 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 11 Feb 2015 14:39:15 +0100 Subject: ieee802154: correct ieee802154_is_valid_psdu_len This patch corrects the ieee802154_is_valid_psdu_len function that this function also checks on reserved values 6-8 for validation the psdu length. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 6e82d888287c..40b0ab953937 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -28,7 +28,8 @@ #include #define IEEE802154_MTU 127 -#define IEEE802154_MIN_PSDU_LEN 5 +#define IEEE802154_ACK_PSDU_LEN 5 +#define IEEE802154_MIN_PSDU_LEN 9 #define IEEE802154_PAN_ID_BROADCAST 0xffff #define IEEE802154_ADDR_SHORT_BROADCAST 0xffff @@ -204,11 +205,18 @@ enum { /** * ieee802154_is_valid_psdu_len - check if psdu len is valid + * available lengths: + * 0-4 Reserved + * 5 MPDU (Acknowledgment) + * 6-8 Reserved + * 9-127 MPDU + * * @len: psdu len with (MHR + payload + MFR) */ static inline bool ieee802154_is_valid_psdu_len(const u8 len) { - return (len >= IEEE802154_MIN_PSDU_LEN && len <= IEEE802154_MTU); + return (len == IEEE802154_ACK_PSDU_LEN || + (len >= IEEE802154_MIN_PSDU_LEN && len <= IEEE802154_MTU)); } /** -- cgit v1.2.3 From 293487c8ecc1103f4625cea5e90e1ba0cc89660f Mon Sep 17 00:00:00 2001 From: Daniel Baluta Date: Tue, 10 Feb 2015 18:33:51 +0200 Subject: iio: Export userspace IIO headers After UAPI header file split [1] all user-kernel interfaces were placed under include/uapi/. This patch moves IIO user specific API from: * include/linux/iio/events.h => include/uapi/linux/iio/events.h * include/linux/types.h => include/uapi/linux/types.h Now there is no need for nasty tricks to compile userspace programs (e.g iio_event_monitor). Just installing the kernel headers with make headers_install command does the job. [1] http://lwn.net/Articles/507794/ Signed-off-by: Daniel Baluta Signed-off-by: Jonathan Cameron --- include/linux/iio/events.h | 30 +----------------- include/linux/iio/types.h | 78 +--------------------------------------------- 2 files changed, 2 insertions(+), 106 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/events.h b/include/linux/iio/events.h index 03fa332ad2a8..8ad87d1c5340 100644 --- a/include/linux/iio/events.h +++ b/include/linux/iio/events.h @@ -9,22 +9,8 @@ #ifndef _IIO_EVENTS_H_ #define _IIO_EVENTS_H_ -#include -#include #include - -/** - * struct iio_event_data - The actual event being pushed to userspace - * @id: event identifier - * @timestamp: best estimate of time of event occurrence (often from - * the interrupt handler) - */ -struct iio_event_data { - __u64 id; - __s64 timestamp; -}; - -#define IIO_GET_EVENT_FD_IOCTL _IOR('i', 0x90, int) +#include /** * IIO_EVENT_CODE() - create event identifier @@ -70,18 +56,4 @@ struct iio_event_data { #define IIO_UNMOD_EVENT_CODE(chan_type, number, type, direction) \ IIO_EVENT_CODE(chan_type, 0, 0, direction, type, number, 0, 0) -#define IIO_EVENT_CODE_EXTRACT_TYPE(mask) ((mask >> 56) & 0xFF) - -#define IIO_EVENT_CODE_EXTRACT_DIR(mask) ((mask >> 48) & 0x7F) - -#define IIO_EVENT_CODE_EXTRACT_CHAN_TYPE(mask) ((mask >> 32) & 0xFF) - -/* Event code number extraction depends on which type of event we have. - * Perhaps review this function in the future*/ -#define IIO_EVENT_CODE_EXTRACT_CHAN(mask) ((__s16)(mask & 0xFFFF)) -#define IIO_EVENT_CODE_EXTRACT_CHAN2(mask) ((__s16)(((mask) >> 16) & 0xFFFF)) - -#define IIO_EVENT_CODE_EXTRACT_MODIFIER(mask) ((mask >> 40) & 0xFF) -#define IIO_EVENT_CODE_EXTRACT_DIFF(mask) (((mask) >> 55) & 0x1) - #endif diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h index 580ed5bdb3fa..942b6de68e2f 100644 --- a/include/linux/iio/types.h +++ b/include/linux/iio/types.h @@ -10,76 +10,7 @@ #ifndef _IIO_TYPES_H_ #define _IIO_TYPES_H_ -enum iio_chan_type { - IIO_VOLTAGE, - IIO_CURRENT, - IIO_POWER, - IIO_ACCEL, - IIO_ANGL_VEL, - IIO_MAGN, - IIO_LIGHT, - IIO_INTENSITY, - IIO_PROXIMITY, - IIO_TEMP, - IIO_INCLI, - IIO_ROT, - IIO_ANGL, - IIO_TIMESTAMP, - IIO_CAPACITANCE, - IIO_ALTVOLTAGE, - IIO_CCT, - IIO_PRESSURE, - IIO_HUMIDITYRELATIVE, - IIO_ACTIVITY, - IIO_STEPS, - IIO_ENERGY, - IIO_DISTANCE, - IIO_VELOCITY, -}; - -enum iio_modifier { - IIO_NO_MOD, - IIO_MOD_X, - IIO_MOD_Y, - IIO_MOD_Z, - IIO_MOD_X_AND_Y, - IIO_MOD_X_AND_Z, - IIO_MOD_Y_AND_Z, - IIO_MOD_X_AND_Y_AND_Z, - IIO_MOD_X_OR_Y, - IIO_MOD_X_OR_Z, - IIO_MOD_Y_OR_Z, - IIO_MOD_X_OR_Y_OR_Z, - IIO_MOD_LIGHT_BOTH, - IIO_MOD_LIGHT_IR, - IIO_MOD_ROOT_SUM_SQUARED_X_Y, - IIO_MOD_SUM_SQUARED_X_Y_Z, - IIO_MOD_LIGHT_CLEAR, - IIO_MOD_LIGHT_RED, - IIO_MOD_LIGHT_GREEN, - IIO_MOD_LIGHT_BLUE, - IIO_MOD_QUATERNION, - IIO_MOD_TEMP_AMBIENT, - IIO_MOD_TEMP_OBJECT, - IIO_MOD_NORTH_MAGN, - IIO_MOD_NORTH_TRUE, - IIO_MOD_NORTH_MAGN_TILT_COMP, - IIO_MOD_NORTH_TRUE_TILT_COMP, - IIO_MOD_RUNNING, - IIO_MOD_JOGGING, - IIO_MOD_WALKING, - IIO_MOD_STILL, - IIO_MOD_ROOT_SUM_SQUARED_X_Y_Z, -}; - -enum iio_event_type { - IIO_EV_TYPE_THRESH, - IIO_EV_TYPE_MAG, - IIO_EV_TYPE_ROC, - IIO_EV_TYPE_THRESH_ADAPTIVE, - IIO_EV_TYPE_MAG_ADAPTIVE, - IIO_EV_TYPE_CHANGE, -}; +#include enum iio_event_info { IIO_EV_INFO_ENABLE, @@ -88,13 +19,6 @@ enum iio_event_info { IIO_EV_INFO_PERIOD, }; -enum iio_event_direction { - IIO_EV_DIR_EITHER, - IIO_EV_DIR_RISING, - IIO_EV_DIR_FALLING, - IIO_EV_DIR_NONE, -}; - #define IIO_VAL_INT 1 #define IIO_VAL_INT_PLUS_MICRO 2 #define IIO_VAL_INT_PLUS_NANO 3 -- cgit v1.2.3 From 02cea3958664723a5d2236f0f0058de97c7e4693 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 5 Feb 2015 14:06:23 +0100 Subject: genirq: Provide disable_hardirq() For things like netpoll there is a need to disable an interrupt from atomic context. Currently netpoll uses disable_irq() which will sleep-wait on threaded handlers and thus forced_irqthreads breaks things. Provide disable_hardirq(), which uses synchronize_hardirq() to only wait for active hardirq handlers; also change synchronize_hardirq() to return the status of threaded handlers. This will allow one to try-disable an interrupt from atomic context, or in case of request_threaded_irq() to only wait for the hardirq part. Suggested-by: Sabrina Dubroca Signed-off-by: Peter Zijlstra (Intel) Cc: Arnd Bergmann Cc: David Miller Cc: Eyal Perry Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Quentin Lambert Cc: Randy Dunlap Cc: Russell King Link: http://lkml.kernel.org/r/20150205130623.GH5029@twins.programming.kicks-ass.net [ Fixed typos and such. ] Signed-off-by: Ingo Molnar --- include/linux/hardirq.h | 2 +- include/linux/interrupt.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index cba442ec3c66..f4af03404b97 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -9,7 +9,7 @@ extern void synchronize_irq(unsigned int irq); -extern void synchronize_hardirq(unsigned int irq); +extern bool synchronize_hardirq(unsigned int irq); #if defined(CONFIG_TINY_RCU) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index d9b05b5bf8c7..3bb01b9a379c 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -184,6 +184,7 @@ extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id); #endif extern void disable_irq_nosync(unsigned int irq); +extern bool disable_hardirq(unsigned int irq); extern void disable_irq(unsigned int irq); extern void disable_percpu_irq(unsigned int irq); extern void enable_irq(unsigned int irq); -- cgit v1.2.3 From bd624d75db21ea5402f9ecf4450b311794d80352 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 13 Feb 2015 08:54:56 +0800 Subject: clockevents: Introduce mode specific callbacks It is not possible for the clockevents core to know which modes (other than those with a corresponding feature flag) are supported by a particular implementation. And drivers are expected to handle transition to all modes elegantly, as ->set_mode() would be issued for them unconditionally. Now, adding support for a new mode complicates things a bit if we want to use the legacy ->set_mode() callback. We need to closely review all clockevents drivers to see if they would break on addition of a new mode. And after such reviews, it is found that we have to do non-trivial changes to most of the drivers [1]. Introduce mode-specific set_mode_*() callbacks, some of which the drivers may or may not implement. A missing callback would clearly convey the message that the corresponding mode isn't supported. A driver may still choose to keep supporting the legacy ->set_mode() callback, but ->set_mode() wouldn't be supporting any new modes beyond RESUME. If a driver wants to benefit from using a new mode, it would be required to migrate to the mode specific callbacks. The legacy ->set_mode() callback and the newly introduced mode-specific callbacks are mutually exclusive. Only one of them should be supported by the driver. Sanity check is done at the time of registration to distinguish between optional and required callbacks and to make error recovery and handling simpler. If the legacy ->set_mode() callback is provided, all mode specific ones would be ignored by the core but a warning is thrown if they are present. Call sites calling ->set_mode() directly are also updated to use __clockevents_set_mode() instead, as ->set_mode() may not be available anymore for few drivers. [1] https://lkml.org/lkml/2014/12/9/605 [2] https://lkml.org/lkml/2015/1/23/255 Suggested-by: Thomas Gleixner [2] Signed-off-by: Viresh Kumar Signed-off-by: Peter Zijlstra (Intel) Cc: Daniel Lezcano Cc: Frederic Weisbecker Cc: John Stultz Cc: Kevin Hilman Cc: Linus Torvalds Cc: Preeti U Murthy Cc: linaro-kernel@lists.linaro.org Cc: linaro-networking@linaro.org Link: http://lkml.kernel.org/r/792d59a40423f0acffc9bb0bec9de1341a06fa02.1423788565.git.viresh.kumar@linaro.org Signed-off-by: Ingo Molnar --- include/linux/clockchips.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 2e4cb67f6e56..59af26b54d15 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -39,6 +39,8 @@ enum clock_event_mode { CLOCK_EVT_MODE_PERIODIC, CLOCK_EVT_MODE_ONESHOT, CLOCK_EVT_MODE_RESUME, + + /* Legacy ->set_mode() callback doesn't support below modes */ }; /* @@ -81,7 +83,11 @@ enum clock_event_mode { * @mode: operating mode assigned by the management code * @features: features * @retries: number of forced programming retries - * @set_mode: set mode function + * @set_mode: legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME. + * @set_mode_periodic: switch mode to periodic, if !set_mode + * @set_mode_oneshot: switch mode to oneshot, if !set_mode + * @set_mode_shutdown: switch mode to shutdown, if !set_mode + * @set_mode_resume: resume clkevt device, if !set_mode * @broadcast: function to broadcast events * @min_delta_ticks: minimum delta value in ticks stored for reconfiguration * @max_delta_ticks: maximum delta value in ticks stored for reconfiguration @@ -108,9 +114,20 @@ struct clock_event_device { unsigned int features; unsigned long retries; - void (*broadcast)(const struct cpumask *mask); + /* + * Mode transition callback(s): Only one of the two groups should be + * defined: + * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME. + * - set_mode_{shutdown|periodic|oneshot|resume}(). + */ void (*set_mode)(enum clock_event_mode mode, struct clock_event_device *); + int (*set_mode_periodic)(struct clock_event_device *); + int (*set_mode_oneshot)(struct clock_event_device *); + int (*set_mode_shutdown)(struct clock_event_device *); + int (*set_mode_resume)(struct clock_event_device *); + + void (*broadcast)(const struct cpumask *mask); void (*suspend)(struct clock_event_device *); void (*resume)(struct clock_event_device *); unsigned long min_delta_ticks; -- cgit v1.2.3 From ba532500c5651a4be4108acc64ed99a95cb005b3 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 4 Nov 2014 21:55:58 -0500 Subject: perf: Introduce pmu context switch callback The callback is invoked when process is scheduled in or out. It provides mechanism for later patches to save/store the LBR stack. For the schedule in case, the callback is invoked at the same place that flush branch stack callback is invoked. So it also can replace the flush branch stack callback. To avoid unnecessary overhead, the callback is enabled only when there are events use the LBR stack. Signed-off-by: Yan, Zheng Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Paul Mackerras Cc: Vince Weaver Cc: eranian@google.com Cc: jolsa@redhat.com Link: http://lkml.kernel.org/r/1415156173-10035-3-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 33262004c310..fbab6235d053 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -265,6 +265,13 @@ struct pmu { * flush branch stack on context-switches (needed in cpu-wide mode) */ void (*flush_branch_stack) (void); + + /* + * context-switches callback + */ + void (*sched_task) (struct perf_event_context *ctx, + bool sched_in); + }; /** @@ -558,6 +565,8 @@ extern void perf_event_delayed_put(struct task_struct *task); extern void perf_event_print_debug(void); extern void perf_pmu_disable(struct pmu *pmu); extern void perf_pmu_enable(struct pmu *pmu); +extern void perf_sched_cb_dec(struct pmu *pmu); +extern void perf_sched_cb_inc(struct pmu *pmu); extern int perf_event_task_disable(void); extern int perf_event_task_enable(void); extern int perf_event_refresh(struct perf_event *event, int refresh); -- cgit v1.2.3 From 2a0ad3b326a9024ba86dca4028499d31fa0c6c4d Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 4 Nov 2014 21:55:59 -0500 Subject: perf/x86/intel: Use context switch callback to flush LBR stack Previous commit introduces context switch callback, its function overlaps with the flush branch stack callback. So we can use the context switch callback to flush LBR stack. This patch adds code that uses the flush branch callback to flush the LBR stack when task is being scheduled in. The callback is enabled only when there are events use the LBR hardware. This patch also removes all old flush branch stack code. Signed-off-by: Yan, Zheng Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Paul Mackerras Cc: Vince Weaver Cc: eranian@google.com Cc: jolsa@redhat.com Link: http://lkml.kernel.org/r/1415156173-10035-4-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index fbab6235d053..c7007a564440 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -511,7 +511,6 @@ struct perf_event_context { u64 generation; int pin_count; int nr_cgroups; /* cgroup evts */ - int nr_branch_stack; /* branch_stack evt */ struct rcu_head rcu_head; struct delayed_work orphans_remove; -- cgit v1.2.3 From 4af57ef28c2c1047fda9e1a5be02aa7a6a69cf9e Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 4 Nov 2014 21:56:01 -0500 Subject: perf: Add pmu specific data for perf task context Introduce a new flag PERF_ATTACH_TASK_DATA for perf event's attach stata. The flag is set by PMU's event_init() callback, it indicates that perf event needs PMU specific data. The PMU specific data are initialized to zeros. Later patches will use PMU specific data to save LBR stack. Signed-off-by: Yan, Zheng Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Paul Mackerras Cc: eranian@google.com Cc: jolsa@redhat.com Link: http://lkml.kernel.org/r/1415156173-10035-6-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index c7007a564440..270cd0173e61 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -271,6 +271,10 @@ struct pmu { */ void (*sched_task) (struct perf_event_context *ctx, bool sched_in); + /* + * PMU specific data size + */ + size_t task_ctx_size; }; @@ -307,6 +311,7 @@ struct swevent_hlist { #define PERF_ATTACH_CONTEXT 0x01 #define PERF_ATTACH_GROUP 0x02 #define PERF_ATTACH_TASK 0x04 +#define PERF_ATTACH_TASK_DATA 0x08 struct perf_cgroup; struct ring_buffer; @@ -511,6 +516,7 @@ struct perf_event_context { u64 generation; int pin_count; int nr_cgroups; /* cgroup evts */ + void *task_ctx_data; /* pmu specific data */ struct rcu_head rcu_head; struct delayed_work orphans_remove; -- cgit v1.2.3 From a46a23000198d929391aa9dac8de68734efa2703 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 4 Nov 2014 21:56:06 -0500 Subject: perf: Simplify the branch stack check Use event->attr.branch_sample_type to replace intel_pmu_needs_lbr_smpl() for avoiding duplicated code that implicitly enables the LBR. Currently, branch stack can be enabled by user explicitly requesting branch sampling or implicit branch sampling to correct PEBS skid. For user explicitly requested branch sampling, the branch_sample_type is explicitly set by user. For PEBS case, the branch_sample_type is also implicitly set to PERF_SAMPLE_BRANCH_ANY in x86_pmu_hw_config. Signed-off-by: Yan, Zheng Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Paul Mackerras Cc: eranian@google.com Cc: jolsa@redhat.com Link: http://lkml.kernel.org/r/1415156173-10035-11-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 270cd0173e61..43cc158487e6 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -814,6 +814,11 @@ static inline bool has_branch_stack(struct perf_event *event) return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK; } +static inline bool needs_branch_stack(struct perf_event *event) +{ + return event->attr.branch_sample_type != 0; +} + extern int perf_output_begin(struct perf_output_handle *handle, struct perf_event *event, unsigned int size); extern void perf_output_end(struct perf_output_handle *handle); -- cgit v1.2.3 From acba3c7e4652ca5fcb2fd9376d58c2dffd8ddf2a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 14 Jan 2015 14:15:39 +0100 Subject: perf, powerpc: Fix up flush_branch_stack() users The recent LBR rework for x86 left a stray flush_branch_stack() user in the PowerPC code, fix that up. Signed-off-by: Peter Zijlstra (Intel) Cc: Anshuman Khandual Cc: Anton Blanchard Cc: Arnaldo Carvalho de Melo Cc: Benjamin Herrenschmidt Cc: Christoph Lameter Cc: Joel Stanley Cc: Linus Torvalds Cc: Michael Ellerman Cc: Michael Neuling Cc: Paul Mackerras Cc: Tejun Heo Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 43cc158487e6..724d3720c9b1 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -261,11 +261,6 @@ struct pmu { */ int (*event_idx) (struct perf_event *event); /*optional */ - /* - * flush branch stack on context-switches (needed in cpu-wide mode) - */ - void (*flush_branch_stack) (void); - /* * context-switches callback */ -- cgit v1.2.3 From 4421f8f0fa02bc982b410cd773223cc280791c54 Mon Sep 17 00:00:00 2001 From: Miroslav Benes Date: Wed, 18 Feb 2015 15:21:07 +0100 Subject: livepatch: remove extern specifier from header files Storage-class specifier 'extern' is redundant in front of the function declaration. According to the C specification it has the same meaning as if not present at all. So remove it. Signed-off-by: Miroslav Benes Acked-by: Josh Poimboeuf Reviewed-by: Masami Hiramatsu Signed-off-by: Jiri Kosina --- include/linux/livepatch.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index 95023fd8b00d..ee6dbb39a809 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -123,10 +123,10 @@ struct klp_patch { enum klp_state state; }; -extern int klp_register_patch(struct klp_patch *); -extern int klp_unregister_patch(struct klp_patch *); -extern int klp_enable_patch(struct klp_patch *); -extern int klp_disable_patch(struct klp_patch *); +int klp_register_patch(struct klp_patch *); +int klp_unregister_patch(struct klp_patch *); +int klp_enable_patch(struct klp_patch *); +int klp_disable_patch(struct klp_patch *); #endif /* CONFIG_LIVEPATCH */ -- cgit v1.2.3 From db2dcb4f91d5fec5c346a82c309187ee821e2495 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 20 Jan 2015 13:00:46 +0000 Subject: irqflags: Fix (at least latent) code generation issue The conditional in local_irq_restore() otherwise can cause code bloat (the if and else blocks may get translated into separate code paths despite the generated code being identical, dependent on compiler internal heuristics). Note that this adjustment gets the code in sync with the comment preceding it (which was slightly wrong from at least from 2.6.37 onwards). The code bloat was observed in reality with an experimental x86 patch. Signed-off-by: Jan Beulich Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/54BE5F8E02000078000570A7@mail.emea.novell.com Signed-off-by: Ingo Molnar --- include/linux/irqflags.h | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index d176d658fe25..5dd1272d1ab2 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -85,7 +85,7 @@ * The local_irq_*() APIs are equal to the raw_local_irq*() * if !TRACE_IRQFLAGS. */ -#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT +#ifdef CONFIG_TRACE_IRQFLAGS #define local_irq_enable() \ do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0) #define local_irq_disable() \ @@ -107,22 +107,6 @@ raw_local_irq_restore(flags); \ } \ } while (0) -#define local_save_flags(flags) \ - do { \ - raw_local_save_flags(flags); \ - } while (0) - -#define irqs_disabled_flags(flags) \ - ({ \ - raw_irqs_disabled_flags(flags); \ - }) - -#define irqs_disabled() \ - ({ \ - unsigned long _flags; \ - raw_local_save_flags(_flags); \ - raw_irqs_disabled_flags(_flags); \ - }) #define safe_halt() \ do { \ @@ -131,7 +115,7 @@ } while (0) -#else /* !CONFIG_TRACE_IRQFLAGS_SUPPORT */ +#else /* !CONFIG_TRACE_IRQFLAGS */ #define local_irq_enable() do { raw_local_irq_enable(); } while (0) #define local_irq_disable() do { raw_local_irq_disable(); } while (0) @@ -140,11 +124,28 @@ raw_local_irq_save(flags); \ } while (0) #define local_irq_restore(flags) do { raw_local_irq_restore(flags); } while (0) -#define local_save_flags(flags) do { raw_local_save_flags(flags); } while (0) -#define irqs_disabled() (raw_irqs_disabled()) -#define irqs_disabled_flags(flags) (raw_irqs_disabled_flags(flags)) #define safe_halt() do { raw_safe_halt(); } while (0) +#endif /* CONFIG_TRACE_IRQFLAGS */ + +#define local_save_flags(flags) raw_local_save_flags(flags) + +/* + * Some architectures don't define arch_irqs_disabled(), so even if either + * definition would be fine we need to use different ones for the time being + * to avoid build issues. + */ +#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT +#define irqs_disabled() \ + ({ \ + unsigned long _flags; \ + raw_local_save_flags(_flags); \ + raw_irqs_disabled_flags(_flags); \ + }) +#else /* !CONFIG_TRACE_IRQFLAGS_SUPPORT */ +#define irqs_disabled() raw_irqs_disabled() #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */ +#define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags) + #endif -- cgit v1.2.3 From 959d10f6bbf6ab5b8813c4e37540a2e43ca2ae96 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 Feb 2015 03:19:24 -0800 Subject: igmp: add __ip_mc_{join|leave}_group() There is a need to perform igmp join/leave operations while RTNL is held. Make ip_mc_{join|leave}_group() wrappers around __ip_mc_{join|leave}_group() to avoid the proliferation of work queues. For example, vxlan_igmp_join() could possibly be removed. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/igmp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 2c677afeea47..b5a6470e686c 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -111,7 +111,9 @@ struct ip_mc_list { extern int ip_check_mc_rcu(struct in_device *dev, __be32 mc_addr, __be32 src_addr, u16 proto); extern int igmp_rcv(struct sk_buff *); +extern int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr); extern int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr); +extern int __ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr); extern int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr); extern void ip_mc_drop_socket(struct sock *sk); extern int ip_mc_source(int add, int omode, struct sock *sk, -- cgit v1.2.3 From 059a2440fd3cf4ec57735db2c0a90401cde84fca Mon Sep 17 00:00:00 2001 From: Bojan Prtvar Date: Sun, 22 Feb 2015 11:46:35 +0100 Subject: net: Remove state argument from skb_find_text() Although it is clear that textsearch state is intentionally passed to skb_find_text() as uninitialized argument, it was never used by the callers. Therefore, we can simplify skb_find_text() by making it local variable. Signed-off-by: Bojan Prtvar Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 30007afe70b3..d898b32dedcc 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -870,8 +870,7 @@ unsigned int skb_seq_read(unsigned int consumed, const u8 **data, void skb_abort_seq_read(struct skb_seq_state *st); unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, - unsigned int to, struct ts_config *config, - struct ts_state *state); + unsigned int to, struct ts_config *config); /* * Packet hash types specify the type of hash in skb_set_hash. -- cgit v1.2.3 From ee8905963ed0bc9dfc0952dc35e16e233c10e212 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 14 Apr 2014 17:53:25 +0200 Subject: of: Add for_each_endpoint_of_node helper macro Note that while of_graph_get_next_endpoint decrements the reference count of the child node passed to it, of_node_put(child) still has to be called manually when breaking out of the loop. Signed-off-by: Philipp Zabel Acked-by: Laurent Pinchart --- include/linux/of_graph.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_graph.h b/include/linux/of_graph.h index befef42e015b..e43442efcbe5 100644 --- a/include/linux/of_graph.h +++ b/include/linux/of_graph.h @@ -26,6 +26,17 @@ struct of_endpoint { const struct device_node *local_node; }; +/** + * for_each_endpoint_of_node - iterate over every endpoint in a device node + * @parent: parent device node containing ports and endpoints + * @child: loop variable pointing to the current endpoint node + * + * When breaking out of the loop, of_node_put(child) has to be called manually. + */ +#define for_each_endpoint_of_node(parent, child) \ + for (child = of_graph_get_next_endpoint(parent, NULL); child != NULL; \ + child = of_graph_get_next_endpoint(parent, child)) + #ifdef CONFIG_OF int of_graph_parse_endpoint(const struct device_node *node, struct of_endpoint *endpoint); -- cgit v1.2.3 From 411fdaf846afb0be1b54383c184f58a42fa416ff Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 17 Feb 2015 01:46:49 +0000 Subject: dmaengine: shdma: use normal interface for passing slave id in dma_slave_config, which is incompatible with the way that the dmaengine API normally works. I've had a closer look at the existing code now and found that all slave drivers that pass a slave_id in dma_slave_config for SH do that right after passing the same ID into shdma_chan_filter, so we can just rely on that. However, the various shdma drivers currently do not remember the slave ID that was passed into the filter function when used in non-DT mode and only check the value to find a matching channel, unlike all other drivers. There might still be drivers that are not part of the kernel that rely on setting the slave_id to some other value, so to be on the safe side, this adds another 'real_slave_id' field to shdma_chan that remembers the ID and uses it when a driver passes a zero slave_id in dma_slave_config, like most drivers do. Eventually, the real_slave_id and slave_id fields should just get merged into one field, but that requires other changes. Signed-off-by: Arnd Bergmann Signed-off-by: Kuninori Morimoto Signed-off-by: Vinod Koul --- include/linux/shdma-base.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/shdma-base.h b/include/linux/shdma-base.h index abdf1f229dc3..dd0ba502ccb3 100644 --- a/include/linux/shdma-base.h +++ b/include/linux/shdma-base.h @@ -69,6 +69,7 @@ struct shdma_chan { int id; /* Raw id of this channel */ int irq; /* Channel IRQ */ int slave_id; /* Client ID for slave DMA */ + int real_slave_id; /* argument passed to filter function */ int hw_req; /* DMA request line for slave DMA - same * as MID/RID, used with DT */ enum shdma_pm_state pm_state; -- cgit v1.2.3 From bfe446e37c4efd8ade454911e8f80414bcbfc10d Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 11 Mar 2014 11:21:11 +0100 Subject: of: Add of_graph_get_port_by_id function This patch adds a function to get a port device tree node by port id, or reg property value. Signed-off-by: Philipp Zabel Acked-by: Laurent Pinchart --- include/linux/of_graph.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_graph.h b/include/linux/of_graph.h index e43442efcbe5..3c1c95a39e0c 100644 --- a/include/linux/of_graph.h +++ b/include/linux/of_graph.h @@ -40,6 +40,7 @@ struct of_endpoint { #ifdef CONFIG_OF int of_graph_parse_endpoint(const struct device_node *node, struct of_endpoint *endpoint); +struct device_node *of_graph_get_port_by_id(struct device_node *node, u32 id); struct device_node *of_graph_get_next_endpoint(const struct device_node *parent, struct device_node *previous); struct device_node *of_graph_get_remote_port_parent( @@ -53,6 +54,12 @@ static inline int of_graph_parse_endpoint(const struct device_node *node, return -ENOSYS; } +static inline struct device_node *of_graph_get_port_by_id( + struct device_node *node, u32 id) +{ + return NULL; +} + static inline struct device_node *of_graph_get_next_endpoint( const struct device_node *parent, struct device_node *previous) -- cgit v1.2.3 From e651a1da442ae02a50081e38309dea5e89da2d41 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 19 Feb 2015 15:31:25 -0800 Subject: HID: hid-sensor-hub: Allow parallel synchronous reads Current implementation only allows one outstanding synchronous read. This is a performance hit when user mode is requesting raw reads of sensor attributes on multiple sensors together. This change changes the mutex lock to per hid sensor hub device instead of global lock. Although request to hid sensor hub is serialized, there can be multiple outstanding read requests pending for responses via hid reports. Signed-off-by: Srinivas Pandruvada Acked-by: Jonathan Cameron Signed-off-by: Jiri Kosina --- include/linux/hid-sensor-hub.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h index 51f7ccadf923..60a34277ddf2 100644 --- a/include/linux/hid-sensor-hub.h +++ b/include/linux/hid-sensor-hub.h @@ -46,20 +46,44 @@ struct hid_sensor_hub_attribute_info { s32 logical_maximum; }; +/** + * struct sensor_hub_pending - Synchronous read pending information + * @status: Pending status true/false. + * @ready: Completion synchronization data. + * @usage_id: Usage id for physical device, E.g. Gyro usage id. + * @attr_usage_id: Usage Id of a field, E.g. X-AXIS for a gyro. + * @raw_size: Response size for a read request. + * @raw_data: Place holder for received response. + */ +struct sensor_hub_pending { + bool status; + struct completion ready; + u32 usage_id; + u32 attr_usage_id; + int raw_size; + u8 *raw_data; +}; + /** * struct hid_sensor_hub_device - Stores the hub instance data * @hdev: Stores the hid instance. * @vendor_id: Vendor id of hub device. * @product_id: Product id of hub device. + * @usage: Usage id for this hub device instance. * @start_collection_index: Starting index for a phy type collection * @end_collection_index: Last index for a phy type collection + * @mutex: synchronizing mutex. + * @pending: Holds information of pending sync read request. */ struct hid_sensor_hub_device { struct hid_device *hdev; u32 vendor_id; u32 product_id; + u32 usage; int start_collection_index; int end_collection_index; + struct mutex mutex; + struct sensor_hub_pending pending; }; /** -- cgit v1.2.3 From cb67126f32f008b9abe97fbfca9b23a797b2458a Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 19 Feb 2015 15:31:26 -0800 Subject: HID: hid-sensor-hub: Add support for application collection Section 4.2.5 of HID Sensor hub specification allows two methods defining sensor devices. - Each sensor device by its own collection - A top level application collection object, including multiple sensors. In the first method, each sensor can be in its own sensor application collection without a physical collection. In the second method there is a usage id for collection type, which is defined as an application collection, with multiple physical collections in it. It is possible to define fusion sensor with this and may have its own handler. If there is a callback registered for the collection type, then forward all reports for sensors in its collection to this handler. Signed-off-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- include/linux/hid-sensor-ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h index 109f0e633e01..f2ee90aed0c2 100644 --- a/include/linux/hid-sensor-ids.h +++ b/include/linux/hid-sensor-ids.h @@ -21,6 +21,8 @@ #define HID_MAX_PHY_DEVICES 0xFF +#define HID_USAGE_SENSOR_COLLECTION 0x200001 + /* Accel 3D (200073) */ #define HID_USAGE_SENSOR_ACCEL_3D 0x200073 #define HID_USAGE_SENSOR_DATA_ACCELERATION 0x200452 -- cgit v1.2.3 From 8f4490e09694efaf7fe60ac6a1135530aa8c05ad Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Wed, 11 Feb 2015 19:39:12 -0800 Subject: regulator: core: Introduce set_load op Expose the requested load directly to the regulator implementation for hardware that does not support the normal enum based set_mode(). Signed-off-by: Bjorn Andersson Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index d4ad5b5a02bb..8a0165f22f0a 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -98,6 +98,7 @@ struct regulator_linear_range { * REGULATOR_STATUS value (or negative errno) * @get_optimum_mode: Get the most efficient operating mode for the regulator * when running with the specified parameters. + * @set_load: Set the load for the regulator. * * @set_bypass: Set the regulator in bypass mode. * @get_bypass: Get the regulator bypass mode state. @@ -167,6 +168,8 @@ struct regulator_ops { /* get most efficient regulator operating mode for load */ unsigned int (*get_optimum_mode) (struct regulator_dev *, int input_uV, int output_uV, int load_uA); + /* set the load on the regulator */ + int (*set_load)(struct regulator_dev *, int load_uA); /* control and report on bypass mode */ int (*set_bypass)(struct regulator_dev *dev, bool enable); -- cgit v1.2.3 From b3f4737d00de317d1549d5cb5b1dad90e19f5cec Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 19 Feb 2015 15:33:56 -0800 Subject: HID: hid-sensor-hub: Extend API for async reads Add additional flag to read in async mode. In this mode the caller will get reply via registered callback for capture_sample. Callbacks can be registered using sensor_hub_register_callback function. The usage id parameter of the capture_sample can be matched with the usage id of the requested attribute. Signed-off-by: Srinivas Pandruvada Acked-by: Jonathan Cameron Signed-off-by: Jiri Kosina --- include/linux/hid-sensor-hub.h | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h index 60a34277ddf2..4c49b041922d 100644 --- a/include/linux/hid-sensor-hub.h +++ b/include/linux/hid-sensor-hub.h @@ -169,19 +169,27 @@ int sensor_hub_input_get_attribute_info(struct hid_sensor_hub_device *hsdev, struct hid_sensor_hub_attribute_info *info); /** -* sensor_hub_input_attr_get_raw_value() - Synchronous read request +* sensor_hub_input_attr_get_raw_value() - Attribute read request * @usage_id: Attribute usage id of parent physical device as per spec * @attr_usage_id: Attribute usage id as per spec * @report_id: Report id to look for +* @flag: Synchronous or asynchronous read * -* Issues a synchronous read request for an input attribute. Returns -* data upto 32 bits. Since client can get events, so this call should -* not be used for data paths, this will impact performance. +* Issues a synchronous or asynchronous read request for an input attribute. +* Returns data upto 32 bits. */ +enum sensor_hub_read_flags { + SENSOR_HUB_SYNC, + SENSOR_HUB_ASYNC, +}; + int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev, - u32 usage_id, - u32 attr_usage_id, u32 report_id); + u32 usage_id, + u32 attr_usage_id, u32 report_id, + enum sensor_hub_read_flags flag +); + /** * sensor_hub_set_feature() - Feature set request * @report_id: Report id to look for -- cgit v1.2.3 From 6adc83fca74ab73abcbd3b394cf3a8fd3701db99 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 19 Feb 2015 15:35:25 -0800 Subject: HID: hid-sensor-hub: Enhance get feature report API Some hid sensor feature report can contain more than one reports. This API can now support receiving multiple values from the feature report. Also update the parameters in the users of this API. Signed-off-by: Srinivas Pandruvada Acked-by: Jonathan Cameron Signed-off-by: Jiri Kosina --- include/linux/hid-sensor-hub.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h index 4c49b041922d..1db332066669 100644 --- a/include/linux/hid-sensor-hub.h +++ b/include/linux/hid-sensor-hub.h @@ -206,13 +206,15 @@ int sensor_hub_set_feature(struct hid_sensor_hub_device *hsdev, u32 report_id, * sensor_hub_get_feature() - Feature get request * @report_id: Report id to look for * @field_index: Field index inside a report -* @value: Place holder for return value +* @buffer_size: size of the buffer +* @buffer: buffer to copy output * * Used to get a field in feature report. For example this can get polling -* interval, sensitivity, activate/deactivate state. +* interval, sensitivity, activate/deactivate state. On success it returns +* number of bytes copied to buffer. On failure, it returns value < 0. */ int sensor_hub_get_feature(struct hid_sensor_hub_device *hsdev, u32 report_id, - u32 field_index, s32 *value); + u32 field_index, int buffer_size, void *buffer); /* hid-sensor-attributes */ -- cgit v1.2.3 From 3950e03389cfc8ee9d7131074d999b5fb6bbc2bf Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 19 Feb 2015 15:35:26 -0800 Subject: HID: hid-sensor-hub: Enhance feature report set API Current API only allows setting one offset in the field. This API is extended to set multiple offsets in the field report. Also update parameters in the users of this API. Signed-off-by: Srinivas Pandruvada Reviewed-by: Jonathan Cameron Signed-off-by: Jiri Kosina --- include/linux/hid-sensor-hub.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h index 1db332066669..4a2fdbabfcf1 100644 --- a/include/linux/hid-sensor-hub.h +++ b/include/linux/hid-sensor-hub.h @@ -194,13 +194,14 @@ int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev, * sensor_hub_set_feature() - Feature set request * @report_id: Report id to look for * @field_index: Field index inside a report -* @value: Value to set +* @buffer_size: size of the buffer +* @buffer: buffer to use in the feature set * * Used to set a field in feature report. For example this can set polling * interval, sensitivity, activate/deactivate state. */ int sensor_hub_set_feature(struct hid_sensor_hub_device *hsdev, u32 report_id, - u32 field_index, s32 value); + u32 field_index, int buffer_size, void *buffer); /** * sensor_hub_get_feature() - Feature get request -- cgit v1.2.3 From 4d3199e4ca8e6670b54dc5ee070ffd54385988e9 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 22 Feb 2015 19:31:41 -0800 Subject: locking: Remove ACCESS_ONCE() usage With the new standardized functions, we can replace all ACCESS_ONCE() calls across relevant locking - this includes lockref and seqlock while at it. ACCESS_ONCE() does not work reliably on non-scalar types. For example gcc 4.6 and 4.7 might remove the volatile tag for such accesses during the SRA (scalar replacement of aggregates) step: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 Update the new calls regardless of if it is a scalar type, this is cleaner than having three alternatives. Signed-off-by: Davidlohr Bueso Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Cc: Paul E. McKenney Link: http://lkml.kernel.org/r/1424662301.6539.18.camel@stgolabs.net Signed-off-by: Ingo Molnar --- include/linux/seqlock.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index f5df8f687b4d..5f68d0a391ce 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -108,7 +108,7 @@ static inline unsigned __read_seqcount_begin(const seqcount_t *s) unsigned ret; repeat: - ret = ACCESS_ONCE(s->sequence); + ret = READ_ONCE(s->sequence); if (unlikely(ret & 1)) { cpu_relax(); goto repeat; @@ -127,7 +127,7 @@ repeat: */ static inline unsigned raw_read_seqcount(const seqcount_t *s) { - unsigned ret = ACCESS_ONCE(s->sequence); + unsigned ret = READ_ONCE(s->sequence); smp_rmb(); return ret; } @@ -179,7 +179,7 @@ static inline unsigned read_seqcount_begin(const seqcount_t *s) */ static inline unsigned raw_seqcount_begin(const seqcount_t *s) { - unsigned ret = ACCESS_ONCE(s->sequence); + unsigned ret = READ_ONCE(s->sequence); smp_rmb(); return ret & ~1; } -- cgit v1.2.3 From 5c9e719691eab8c5de8b1b68fc3da9f7c4470c38 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 23 Feb 2015 17:10:03 +0100 Subject: regulator: core: Fix space before TAB Signed-off-by: Geert Uytterhoeven Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index d17e1ff7ad01..aeacd624a794 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -114,7 +114,7 @@ struct regmap; #define REGULATOR_EVENT_OVER_TEMP 0x10 #define REGULATOR_EVENT_FORCE_DISABLE 0x20 #define REGULATOR_EVENT_VOLTAGE_CHANGE 0x40 -#define REGULATOR_EVENT_DISABLE 0x80 +#define REGULATOR_EVENT_DISABLE 0x80 #define REGULATOR_EVENT_PRE_VOLTAGE_CHANGE 0x100 #define REGULATOR_EVENT_ABORT_VOLTAGE_CHANGE 0x200 #define REGULATOR_EVENT_PRE_DISABLE 0x400 -- cgit v1.2.3 From 182d919b84902eece162c63ed3d476c8016b4197 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Feb 2015 23:47:31 +0000 Subject: FS-Cache: Count culled objects and objects rejected due to lack of space Count the number of objects that get culled by the cache backend and the number of objects that the cache backend declines to instantiate due to lack of space in the cache. These numbers are made available through /proc/fs/fscache/stats Signed-off-by: David Howells Reviewed-by: Steve Dickson Acked-by: Jeff Layton --- include/linux/fscache-cache.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 771484993ca7..c9dafdaf3347 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -371,6 +371,7 @@ struct fscache_object { #define FSCACHE_OBJECT_IS_LOOKED_UP 4 /* T if object has been looked up */ #define FSCACHE_OBJECT_IS_AVAILABLE 5 /* T if object has become active */ #define FSCACHE_OBJECT_RETIRED 6 /* T if object was retired on relinquishment */ +#define FSCACHE_OBJECT_KILLED_BY_CACHE 7 /* T if object was killed by the cache */ struct list_head cache_link; /* link in cache->object_list */ struct hlist_node cookie_link; /* link in cookie->backing_objects */ @@ -551,4 +552,15 @@ extern enum fscache_checkaux fscache_check_aux(struct fscache_object *object, const void *data, uint16_t datalen); +extern void fscache_object_retrying_stale(struct fscache_object *object); + +enum fscache_why_object_killed { + FSCACHE_OBJECT_IS_STALE, + FSCACHE_OBJECT_NO_SPACE, + FSCACHE_OBJECT_WAS_RETIRED, + FSCACHE_OBJECT_WAS_CULLED, +}; +extern void fscache_object_mark_killed(struct fscache_object *object, + enum fscache_why_object_killed why); + #endif /* _LINUX_FSCACHE_CACHE_H */ -- cgit v1.2.3 From 0c571785813cf3294cdbb1f2fb1a9b19e11935f6 Mon Sep 17 00:00:00 2001 From: Jacek Anaszewski Date: Fri, 13 Feb 2015 08:06:47 -0800 Subject: leds: flash: remove stray include directive Avoid including v4l2-controls.h, as this is stray code from the early versions of the LED / V4L2 flash API integration patches. LED Flash class doesn't depend on V4L2 subsystem. Signed-off-by: Jacek Anaszewski Signed-off-by: Bryan Wu --- include/linux/led-class-flash.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/led-class-flash.h b/include/linux/led-class-flash.h index 5ba2facd7a51..3b5b9643cea1 100644 --- a/include/linux/led-class-flash.h +++ b/include/linux/led-class-flash.h @@ -13,7 +13,6 @@ #define __LINUX_FLASH_LEDS_H_INCLUDED #include -#include struct device_node; struct led_classdev_flash; -- cgit v1.2.3 From 39bed6cbb842d8edf5a26b01122b391d36775b5e Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 23 Jan 2015 18:45:40 +0000 Subject: perf: Make perf_cgroup_from_task() global Move perf_cgroup_from_task() from kernel/events/ to include/linux/ along with the necessary struct definitions, so that it can be used by the PMU code. When the upcoming Intel Cache Monitoring PMU driver assigns monitoring IDs to perf events, it needs to be able to check whether any two monitoring events overlap (say, a cgroup and task event), which means we need to be able to lookup the cgroup associated with a task (if any). Signed-off-by: Matt Fleming Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Kanaka Juvva Cc: Linus Torvalds Cc: Paul Mackerras Cc: Vikas Shivappa Link: http://lkml.kernel.org/r/1422038748-21397-2-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 724d3720c9b1..cae4a9481777 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -53,6 +53,7 @@ struct perf_guest_info_callbacks { #include #include #include +#include #include struct perf_callchain_entry { @@ -547,6 +548,35 @@ struct perf_output_handle { int page; }; +#ifdef CONFIG_CGROUP_PERF + +/* + * perf_cgroup_info keeps track of time_enabled for a cgroup. + * This is a per-cpu dynamically allocated data structure. + */ +struct perf_cgroup_info { + u64 time; + u64 timestamp; +}; + +struct perf_cgroup { + struct cgroup_subsys_state css; + struct perf_cgroup_info __percpu *info; +}; + +/* + * Must ensure cgroup is pinned (css_get) before calling + * this function. In other words, we cannot call this function + * if there is no cgroup event for the current CPU context. + */ +static inline struct perf_cgroup * +perf_cgroup_from_task(struct task_struct *task) +{ + return container_of(task_css(task, perf_event_cgrp_id), + struct perf_cgroup, css); +} +#endif /* CONFIG_CGROUP_PERF */ + #ifdef CONFIG_PERF_EVENTS extern int perf_pmu_register(struct pmu *pmu, const char *name, int type); -- cgit v1.2.3 From eacd3ecc34472ce3751eedfc94e44c7cc6eb6305 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 23 Jan 2015 18:45:41 +0000 Subject: perf: Add ->count() function to read per-package counters For PMU drivers that record per-package counters, the ->count variable cannot be used to record an accurate aggregated value, since it's not possible to perform SMP cross-calls to cpus on other packages from the context in which we update ->count. Introduce a new optional ->count() accessor function that can be used to customize how values are collected. If a PMU driver doesn't provide a ->count() function, we fallback to the existing code. There is necessarily a window of staleness with this approach because the task that generated the counter value may not have been scheduled by the cpu recently. An alternative and more complex approach would be to use a hrtimer to periodically refresh the values from a more permissive scheduling context. So, we're trading off complexity for accuracy. Signed-off-by: Matt Fleming Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Kanaka Juvva Cc: Linus Torvalds Cc: Vikas Shivappa Link: http://lkml.kernel.org/r/1422038748-21397-3-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index cae4a9481777..9fc9b0d31442 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -272,6 +272,11 @@ struct pmu { */ size_t task_ctx_size; + + /* + * Return the count value for a counter. + */ + u64 (*count) (struct perf_event *event); /*optional*/ }; /** @@ -770,6 +775,11 @@ static inline void perf_event_task_sched_out(struct task_struct *prev, __perf_event_task_sched_out(prev, next); } +static inline u64 __perf_event_count(struct perf_event *event) +{ + return local64_read(&event->count) + atomic64_read(&event->child_count); +} + extern void perf_event_mmap(struct vm_area_struct *vma); extern struct perf_guest_info_callbacks *perf_guest_cbs; extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); -- cgit v1.2.3 From 4afbb24ce5e723c8a093a6674a3c33062175078a Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 23 Jan 2015 18:45:44 +0000 Subject: perf/x86/intel: Add Intel Cache QoS Monitoring support Future Intel Xeon processors support a Cache QoS Monitoring feature that allows tracking of the LLC occupancy for a task or task group, i.e. the amount of data in pulled into the LLC for the task (group). Currently the PMU only supports per-cpu events. We create an event for each cpu and read out all the LLC occupancy values. Because this results in duplicate values being written out to userspace, we also export a .per-pkg event file so that the perf tools only accumulate values for one cpu per package. Signed-off-by: Matt Fleming Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Kanaka Juvva Cc: Linus Torvalds Cc: Vikas Shivappa Link: http://lkml.kernel.org/r/1422038748-21397-6-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 9fc9b0d31442..ca5504c48f4f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -123,6 +123,13 @@ struct hw_perf_event { /* for tp_event->class */ struct list_head tp_list; }; + struct { /* intel_cqm */ + int cqm_state; + int cqm_rmid; + struct list_head cqm_events_entry; + struct list_head cqm_groups_entry; + struct list_head cqm_group_entry; + }; #ifdef CONFIG_HAVE_HW_BREAKPOINT struct { /* breakpoint */ /* -- cgit v1.2.3 From bfe1fcd2688f557a6b6a88f59ea7619228728bd7 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 23 Jan 2015 18:45:46 +0000 Subject: perf/x86/intel: Support task events with Intel CQM Add support for task events as well as system-wide events. This change has a big impact on the way that we gather LLC occupancy values in intel_cqm_event_read(). Currently, for system-wide (per-cpu) events we defer processing to userspace which knows how to discard all but one cpu result per package. Things aren't so simple for task events because we need to do the value aggregation ourselves. To do this, we defer updating the LLC occupancy value in event->count from intel_cqm_event_read() and do an SMP cross-call to read values for all packages in intel_cqm_event_count(). We need to ensure that we only do this for one task event per cache group, otherwise we'll report duplicate values. If we're a system-wide event we want to fallback to the default perf_event_count() implementation. Refactor this into a common function so that we don't duplicate the code. Also, introduce PERF_TYPE_INTEL_CQM, since we need a way to track an event's task (if the event isn't per-cpu) inside of the Intel CQM PMU driver. This task information is only availble in the upper layers of the perf infrastructure. Other perf backends stash the target task in event->hw.*target so we need to do something similar. The task is used to determine whether events should share a cache group and an RMID. Signed-off-by: Matt Fleming Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Kanaka Juvva Cc: Linus Torvalds Cc: Vikas Shivappa Cc: linux-api@vger.kernel.org Link: http://lkml.kernel.org/r/1422038748-21397-8-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ca5504c48f4f..dac4c2831d82 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -129,6 +129,7 @@ struct hw_perf_event { struct list_head cqm_events_entry; struct list_head cqm_groups_entry; struct list_head cqm_group_entry; + struct task_struct *cqm_target; }; #ifdef CONFIG_HAVE_HW_BREAKPOINT struct { /* breakpoint */ -- cgit v1.2.3 From 5d8a4219a0795a321606c51582898223db80e874 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 24 Feb 2015 15:33:50 +1100 Subject: power_supply core: support use of devres to register/unregister a power supply. Using devm_power_supply_register allows the unregister to happen automatically on error or final put. Signed-off-by: NeilBrown Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 096dbced02ac..f606d6b4bd56 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -278,6 +278,10 @@ extern int power_supply_register(struct device *parent, struct power_supply *psy); extern int power_supply_register_no_ws(struct device *parent, struct power_supply *psy); +extern int devm_power_supply_register(struct device *parent, + struct power_supply *psy); +extern int devm_power_supply_register_no_ws(struct device *parent, + struct power_supply *psy); extern void power_supply_unregister(struct power_supply *psy); extern int power_supply_powers(struct power_supply *psy, struct device *dev); -- cgit v1.2.3 From ee376dbdf27728a2f3d30e2ba10fa387cc4c645b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 10 Jan 2015 19:47:10 -0800 Subject: rcu: Consolidate rcu_synchronize and wakeme_after_rcu() There are currently duplicate identical definitions of the rcu_synchronize() structure and the wakeme_after_rcu() function. Thie commit therefore consolidates them. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 78097491cd99..3e6afed51051 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -195,6 +195,15 @@ void call_rcu_sched(struct rcu_head *head, void synchronize_sched(void); +/* + * Structure allowing asynchronous waiting on RCU. + */ +struct rcu_synchronize { + struct rcu_head head; + struct completion completion; +}; +void wakeme_after_rcu(struct rcu_head *head); + /** * call_rcu_tasks() - Queue an RCU for invocation task-based grace period * @head: structure to be used for queueing the RCU updates. -- cgit v1.2.3 From e5d4ef0d731664b3fe204f4e5e87f5756e848fb1 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Sat, 17 Jan 2015 15:21:22 +0000 Subject: mfd: arizona: Add support for WM8280/WM8281 This adds support for the Wolfson Microelectronics WM8280 and WM8281 codecs. Signed-off-by: Richard Fitzgerald Signed-off-by: Charles Keepax [Lee: Minor fixup to remove potentially uninitialised variable. ] Signed-off-by: Lee Jones --- include/linux/mfd/arizona/core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h index 910e3aa1e965..f97010576f56 100644 --- a/include/linux/mfd/arizona/core.h +++ b/include/linux/mfd/arizona/core.h @@ -24,6 +24,7 @@ enum arizona_type { WM5102 = 1, WM5110 = 2, WM8997 = 3, + WM8280 = 4, }; #define ARIZONA_IRQ_GP1 0 -- cgit v1.2.3 From 0d39482c3db13aae1db143d340816108dd53e443 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 18 Feb 2015 12:24:30 -0800 Subject: rcu: Provide rcu_expedite_gp() and rcu_unexpedite_gp() Currently, expediting of normal synchronous grace-period primitives (synchronize_rcu() and friends) is controlled by the rcu_expedited() boot/sysfs parameter. This works well, but does not handle nesting. This commit therefore provides rcu_expedite_gp() to enable expediting and rcu_unexpedite_gp() to cancel a prior rcu_expedite_gp(), both of which support nesting. Reported-by: Arjan van de Ven Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 78097491cd99..57a4d1f73a00 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -48,6 +48,26 @@ extern int rcu_expedited; /* for sysctl */ +#ifdef CONFIG_TINY_RCU +/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */ +static inline bool rcu_gp_is_expedited(void) /* Internal RCU use. */ +{ + return false; +} + +static inline void rcu_expedite_gp(void) +{ +} + +static inline void rcu_unexpedite_gp(void) +{ +} +#else /* #ifdef CONFIG_TINY_RCU */ +bool rcu_gp_is_expedited(void); /* Internal RCU use. */ +void rcu_expedite_gp(void); +void rcu_unexpedite_gp(void); +#endif /* #else #ifdef CONFIG_TINY_RCU */ + enum rcutorture_type { RCU_FLAVOR, RCU_BH_FLAVOR, -- cgit v1.2.3 From ee42571f4381f184e2672dd34ab411e5bf5bd5e0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 19 Feb 2015 10:51:32 -0800 Subject: rcu: Add Kconfig option to expedite grace periods during boot This commit adds a CONFIG_RCU_EXPEDITE_BOOT Kconfig parameter that emulates a very early boot rcu_expedite_gp(). A late-boot call to rcu_end_inkernel_boot() will provide the corresponding rcu_unexpedite_gp(). The late-boot call to rcu_end_inkernel_boot() should be made just before init is spawned. According to Arjan: > To show the boot time, I'm using the timestamp of the "Write protecting" > line, that's pretty much the last thing we print prior to ring 3 execution. > > A kernel with default RCU behavior (inside KVM, only virtual devices) > looks like this: > > [ 0.038724] Write protecting the kernel read-only data: 10240k > > a kernel with expedited RCU (using the command line option, so that I > don't have to recompile between measurements and thus am completely > oranges-to-oranges) > > [ 0.031768] Write protecting the kernel read-only data: 10240k > > which, in percentage, is an 18% improvement. Reported-by: Arjan van de Ven Signed-off-by: Paul E. McKenney Tested-by: Arjan van de Ven --- include/linux/rcupdate.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 57a4d1f73a00..b9f039b11d31 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -278,6 +278,7 @@ static inline int rcu_preempt_depth(void) /* Internal to kernel */ void rcu_init(void); +void rcu_end_inkernel_boot(void); void rcu_sched_qs(void); void rcu_bh_qs(void); void rcu_check_callbacks(int user); -- cgit v1.2.3 From 1b84f2a4cd4a6f517a313261f6f7c8caae5696c6 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 2 Feb 2015 12:26:22 +0100 Subject: mfd: cros_ec: Use fixed size arrays to transfer data with the EC The struct cros_ec_command will be used as an ioctl() argument for the API to control the ChromeOS EC from user-space. So the data structure has to be 64-bit safe to make it compatible between 32 and 64 avoiding the need for a compat ioctl interface. Since pointers are self-aligned to different byte boundaries, use fixed size arrays instead of pointers for transferring ingoing and outgoing data with the Embedded Controller. Also, re-arrange struct members by decreasing alignment requirements to reduce the needing padding size. Signed-off-by: Javier Martinez Canillas Acked-by: Lee Jones Tested-by: Gwendal Grignou Reviewed-by: Gwendal Grignou Signed-off-by: Olof Johansson --- include/linux/mfd/cros_ec.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 0e166b92f5b4..71675b14b5ca 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -38,20 +38,20 @@ enum { /* * @version: Command version number (often 0) * @command: Command to send (EC_CMD_...) - * @outdata: Outgoing data to EC * @outsize: Outgoing length in bytes - * @indata: Where to put the incoming data from EC * @insize: Max number of bytes to accept from EC * @result: EC's response to the command (separate from communication failure) + * @outdata: Outgoing data to EC + * @indata: Where to put the incoming data from EC */ struct cros_ec_command { uint32_t version; uint32_t command; - uint8_t *outdata; uint32_t outsize; - uint8_t *indata; uint32_t insize; uint32_t result; + uint8_t outdata[EC_PROTO2_MAX_PARAM_SIZE]; + uint8_t indata[EC_PROTO2_MAX_PARAM_SIZE]; }; /** -- cgit v1.2.3 From 05c11ac4e0712def44cccbff82ad980d9e1d1b80 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 2 Feb 2015 12:26:23 +0100 Subject: mfd: cros_ec: Add char dev and virtual dev pointers The ChromeOS Embedded Controller has to be accessed by applications. A virtual character device is used as an interface with user-space. Extend the struct cros_ec_device with the fields needed by the driver of this virtual character device. Signed-off-by: Javier Martinez Canillas Acked-by: Lee Jones Tested-by: Gwendal Grignou Reviewed-by: Gwendal Grignou Signed-off-by: Olof Johansson --- include/linux/mfd/cros_ec.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 71675b14b5ca..324a34683971 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -16,6 +16,7 @@ #ifndef __LINUX_MFD_CROS_EC_H #define __LINUX_MFD_CROS_EC_H +#include #include #include #include @@ -59,9 +60,17 @@ struct cros_ec_command { * * @ec_name: name of EC device (e.g. 'chromeos-ec') * @phys_name: name of physical comms layer (e.g. 'i2c-4') - * @dev: Device pointer + * @dev: Device pointer for physical comms device + * @vdev: Device pointer for virtual comms device + * @cdev: Character device structure for virtual comms device * @was_wake_device: true if this device was set to wake the system from * sleep at the last suspend + * @cmd_readmem: direct read of the EC memory-mapped region, if supported + * @offset is within EC_LPC_ADDR_MEMMAP region. + * @bytes: number of bytes to read. zero means "read a string" (including + * the trailing '\0'). At most only EC_MEMMAP_SIZE bytes can be read. + * Caller must ensure that the buffer is large enough for the result when + * reading a string. * * @priv: Private data * @irq: Interrupt to use @@ -90,8 +99,12 @@ struct cros_ec_device { const char *ec_name; const char *phys_name; struct device *dev; + struct device *vdev; + struct cdev cdev; bool was_wake_device; struct class *cros_class; + int (*cmd_readmem)(struct cros_ec_device *ec, unsigned int offset, + unsigned int bytes, void *dest); /* These are used to implement the platform-specific interface */ void *priv; -- cgit v1.2.3 From 6232c51cb370919b116e0aea38d12aa33aae2fa9 Mon Sep 17 00:00:00 2001 From: Ulrich Hecht Date: Thu, 26 Feb 2015 17:42:07 +0100 Subject: ARM: shmobile: r8a7778: common clock framework CPG driver Driver for the r8a7778's clocks that depend on the mode bits. Signed-off-by: Ulrich Hecht Acked-by: Laurent Pinchart Acked-by: Michael Turquette Signed-off-by: Simon Horman --- include/linux/clk/shmobile.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clk/shmobile.h b/include/linux/clk/shmobile.h index 9f8a14041dd5..63a8159c4e64 100644 --- a/include/linux/clk/shmobile.h +++ b/include/linux/clk/shmobile.h @@ -16,6 +16,7 @@ #include +void r8a7778_clocks_init(u32 mode); void r8a7779_clocks_init(u32 mode); void rcar_gen2_clocks_init(u32 mode); -- cgit v1.2.3 From ccdaeb2b176f7db491a6f8e8b1c51f9393525f7d Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 27 Feb 2015 09:58:26 +0100 Subject: at86rf230: add support for external xtal trim This patch adds support for setting the xtal trim register. Some at86rf2xx transceiver boards needs fine tuning the xtal capacitor. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/spi/at86rf230.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/spi/at86rf230.h b/include/linux/spi/at86rf230.h index cd519a11c2c6..b63fe6f5fdc8 100644 --- a/include/linux/spi/at86rf230.h +++ b/include/linux/spi/at86rf230.h @@ -22,6 +22,7 @@ struct at86rf230_platform_data { int rstn; int slp_tr; int dig2; + u8 xtal_trim; }; #endif -- cgit v1.2.3 From 9e39dc1e563e8d390bae42ee80e1e665c18b7de2 Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Fri, 13 Feb 2015 03:46:07 -0500 Subject: Documentation: Add "@" in front of private structure members. Even "private" structure members need a leading "@" in their kernel-doc; otherwise, they will be treated as new section names in the resulting manual. Signed-off-by: Robert P. J. Day Acked-by: Randy Dunlap Acked-By: Sebastian Reichel Signed-off-by: Jonathan Corbet --- include/linux/hsi/hsi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hsi/hsi.h b/include/linux/hsi/hsi.h index 3ec06300d535..5dd60c2e120f 100644 --- a/include/linux/hsi/hsi.h +++ b/include/linux/hsi/hsi.h @@ -135,9 +135,9 @@ static inline int hsi_register_board_info(struct hsi_board_info const *info, * @device: Driver model representation of the device * @tx_cfg: HSI TX configuration * @rx_cfg: HSI RX configuration - * e_handler: Callback for handling port events (RX Wake High/Low) - * pclaimed: Keeps tracks if the clients claimed its associated HSI port - * nb: Notifier block for port events + * @e_handler: Callback for handling port events (RX Wake High/Low) + * @pclaimed: Keeps tracks if the clients claimed its associated HSI port + * @nb: Notifier block for port events */ struct hsi_client { struct device device; -- cgit v1.2.3 From 6588af614e7b79294fbcd4a666a7422c0c854e80 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 26 Feb 2015 19:34:37 +0000 Subject: usbnet: Fix tx_packets stat for FLAG_MULTI_FRAME drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently the usbnet core does not update the tx_packets statistic for drivers with FLAG_MULTI_PACKET and there is no hook in the TX completion path where they could do this. cdc_ncm and dependent drivers are bumping tx_packets stat on the transmit path while asix and sr9800 aren't updating it at all. Add a packet count in struct skb_data so these drivers can fill it in, initialise it to 1 for other drivers, and add the packet count to the tx_packets statistic on completion. Signed-off-by: Ben Hutchings Tested-by: Bjørn Mork Signed-off-by: David S. Miller --- include/linux/usb/usbnet.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index d9a4905e01d0..ff3fb2bd0e90 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -228,8 +228,20 @@ struct skb_data { /* skb->cb is one of these */ struct usbnet *dev; enum skb_state state; size_t length; + unsigned long packets; }; +/* Drivers that set FLAG_MULTI_PACKET must call this in their + * tx_fixup method before returning an skb. + */ +static inline void +usbnet_set_skb_tx_stats(struct sk_buff *skb, unsigned long packets) +{ + struct skb_data *entry = (struct skb_data *) skb->cb; + + entry->packets = packets; +} + extern int usbnet_open(struct net_device *net); extern int usbnet_stop(struct net_device *net); extern netdev_tx_t usbnet_start_xmit(struct sk_buff *skb, -- cgit v1.2.3 From 5f852eb536ad651b8734559dcf4353514cb0bea3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Feb 2015 14:10:18 -0800 Subject: tcp: tso: remove tp->tso_deferred TSO relies on ability to defer sending a small amount of packets. Heuristic is to wait for future ACKS in hope to send more packets at once. Current algorithm uses a per socket tso_deferred field as a pseudo timer. This pseudo timer relies on future ACK, but there is no guarantee we receive them in time. Fix would be to use a real timer, but cost of such timer is probably too expensive for typical cases. This patch changes the logic to test the time of last transmit, because we should not add bursts of more than 1ms for any given flow. We've used this patch for about two years at Google, before FQ/pacing as it would reduce a fair amount of bursts. Signed-off-by: Eric Dumazet Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 1a7adb411647..97dbf16f7d9d 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -236,7 +236,6 @@ struct tcp_sock { u32 lost_out; /* Lost packets */ u32 sacked_out; /* SACK'd packets */ u32 fackets_out; /* FACK'd packets */ - u32 tso_deferred; /* from STCP, retrans queue hinting */ struct sk_buff* lost_skb_hint; -- cgit v1.2.3 From a2c83fff582ae133d9f5bb187404ea9ce4da1f96 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 1 Mar 2015 12:31:42 +0100 Subject: ebpf: constify various function pointer structs We can move bpf_map_ops and bpf_verifier_ops and other structs into ro section, bpf_map_type_list and bpf_prog_type_list into read mostly. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bbfceb756452..78446860f796 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -32,13 +32,13 @@ struct bpf_map { u32 key_size; u32 value_size; u32 max_entries; - struct bpf_map_ops *ops; + const struct bpf_map_ops *ops; struct work_struct work; }; struct bpf_map_type_list { struct list_head list_node; - struct bpf_map_ops *ops; + const struct bpf_map_ops *ops; enum bpf_map_type type; }; @@ -109,7 +109,7 @@ struct bpf_verifier_ops { struct bpf_prog_type_list { struct list_head list_node; - struct bpf_verifier_ops *ops; + const struct bpf_verifier_ops *ops; enum bpf_prog_type type; }; @@ -121,7 +121,7 @@ struct bpf_prog_aux { atomic_t refcnt; bool is_gpl_compatible; enum bpf_prog_type prog_type; - struct bpf_verifier_ops *ops; + const struct bpf_verifier_ops *ops; struct bpf_map **used_maps; u32 used_map_cnt; struct bpf_prog *prog; @@ -138,8 +138,8 @@ struct bpf_prog *bpf_prog_get(u32 ufd); int bpf_check(struct bpf_prog *fp, union bpf_attr *attr); /* verifier prototypes for helper functions called from eBPF programs */ -extern struct bpf_func_proto bpf_map_lookup_elem_proto; -extern struct bpf_func_proto bpf_map_update_elem_proto; -extern struct bpf_func_proto bpf_map_delete_elem_proto; +extern const struct bpf_func_proto bpf_map_lookup_elem_proto; +extern const struct bpf_func_proto bpf_map_update_elem_proto; +extern const struct bpf_func_proto bpf_map_delete_elem_proto; #endif /* _LINUX_BPF_H */ -- cgit v1.2.3 From f1a66f85b74c5ef7b503f746ea97742dacd56419 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 1 Mar 2015 12:31:43 +0100 Subject: ebpf: export BPF_PSEUDO_MAP_FD to uapi We need to export BPF_PSEUDO_MAP_FD to user space, as it's used in the ELF BPF loader where instructions are being loaded that need map fixups. An initial stage loads all maps into the kernel, and later on replaces related instructions in the eBPF blob with BPF_PSEUDO_MAP_FD as source register and the actual fd as immediate value. The kernel verifier recognizes this keyword and replaces the map fd with a real pointer internally. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index caac2087a4d5..5e3863d5f666 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -145,8 +145,6 @@ struct bpf_prog_aux; .off = 0, \ .imm = ((__u64) (IMM)) >> 32 }) -#define BPF_PSEUDO_MAP_FD 1 - /* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */ #define BPF_LD_MAP_FD(DST, MAP_FD) \ BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD) -- cgit v1.2.3 From 0fc174dea54546e2b1146e1197da1b6d4bc48107 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 1 Mar 2015 12:31:44 +0100 Subject: ebpf: make internal bpf API independent of CONFIG_BPF_SYSCALL ifdefs Socket filter code and other subsystems with upcoming eBPF support should not need to deal with the fact that we have CONFIG_BPF_SYSCALL defined or not. Having the bpf syscall as a config option is a nice thing and I'd expect it to stay that way for expert users (I presume one day the default setting of it might change, though), but code making use of it should not care if it's actually enabled or not. Instead, hide this via header files and let the rest deal with it. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 78446860f796..9c458144cdb4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -113,8 +113,6 @@ struct bpf_prog_type_list { enum bpf_prog_type type; }; -void bpf_register_prog_type(struct bpf_prog_type_list *tl); - struct bpf_prog; struct bpf_prog_aux { @@ -129,11 +127,25 @@ struct bpf_prog_aux { }; #ifdef CONFIG_BPF_SYSCALL +void bpf_register_prog_type(struct bpf_prog_type_list *tl); + void bpf_prog_put(struct bpf_prog *prog); +struct bpf_prog *bpf_prog_get(u32 ufd); #else -static inline void bpf_prog_put(struct bpf_prog *prog) {} +static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl) +{ +} + +static inline struct bpf_prog *bpf_prog_get(u32 ufd) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void bpf_prog_put(struct bpf_prog *prog) +{ +} #endif -struct bpf_prog *bpf_prog_get(u32 ufd); + /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog *fp, union bpf_attr *attr); -- cgit v1.2.3 From 24701ecea76b0b93bd9667486934ec310825f558 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 1 Mar 2015 12:31:47 +0100 Subject: ebpf: move read-only fields to bpf_prog and shrink bpf_prog_aux is_gpl_compatible and prog_type should be moved directly into bpf_prog as they stay immutable during bpf_prog's lifetime, are core attributes and they can be locked as read-only later on via bpf_prog_select_runtime(). With a bit of rearranging, this also allows us to shrink bpf_prog_aux to exactly 1 cacheline. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 4 +--- include/linux/filter.h | 4 +++- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9c458144cdb4..a1a7ff2df328 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -117,11 +117,9 @@ struct bpf_prog; struct bpf_prog_aux { atomic_t refcnt; - bool is_gpl_compatible; - enum bpf_prog_type prog_type; + u32 used_map_cnt; const struct bpf_verifier_ops *ops; struct bpf_map **used_maps; - u32 used_map_cnt; struct bpf_prog *prog; struct work_struct work; }; diff --git a/include/linux/filter.h b/include/linux/filter.h index 5e3863d5f666..9ee8c67ea249 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -308,9 +308,11 @@ struct bpf_binary_header { struct bpf_prog { u16 pages; /* Number of allocated pages */ bool jited; /* Is our filter JIT'ed? */ + bool gpl_compatible; /* Is our filter GPL compatible? */ u32 len; /* Number of filter blocks */ - struct sock_fprog_kern *orig_prog; /* Original BPF program */ + enum bpf_prog_type type; /* Type of BPF program */ struct bpf_prog_aux *aux; /* Auxiliary fields */ + struct sock_fprog_kern *orig_prog; /* Original BPF program */ unsigned int (*bpf_func)(const struct sk_buff *skb, const struct bpf_insn *filter); /* Instructions for interpreter */ -- cgit v1.2.3 From bc63b6f634d91a0b2a7f3ba4f266e55fec369de3 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 27 Feb 2015 11:25:52 -0800 Subject: Drivers: hv: vmbus: rename channel work queues All channel work queues are named 'hv_vmbus_ctl', this makes them indistinguishable in ps output and makes it hard to link to the corresponding vmbus device. Rename them to hv_vmbus_ctl/N and make vmbus device names match, e.g. now vmbus_1 device is served by hv_vmbus_ctl/1 work queue. Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 5a2ba674795e..26a32b771ee5 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -646,6 +646,9 @@ struct hv_input_signal_event_buffer { }; struct vmbus_channel { + /* Unique channel id */ + int id; + struct list_head listentry; struct hv_device *device_obj; -- cgit v1.2.3 From 04653a009a63d6a91c60a5449ea97e3ed5e1dc29 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Fri, 27 Feb 2015 11:26:05 -0800 Subject: Drivers: hv: vmbus: Add support for the NetworkDirect GUID NetworkDirect is a service that supports guest RDMA. Define the GUID for this service. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 26a32b771ee5..7d976ac01fac 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1109,6 +1109,16 @@ void vmbus_driver_unregister(struct hv_driver *hv_driver); 0x9A, 0xE7, 0x6B, 0x17, 0x49, 0x77, 0xC1, 0x92 \ } +/* + * NetworkDirect. This is the guest RDMA service. + * {8c2eaf3d-32a7-4b09-ab99-bd1f1c86b501} + */ +#define HV_ND_GUID \ + .guid = { \ + 0x3d, 0xaf, 0x2e, 0x8c, 0xa7, 0x32, 0x09, 0x4b, \ + 0xab, 0x99, 0xbd, 0x1f, 0x1c, 0x86, 0xb5, 0x01 \ + } + /* * Common header for Hyper-V ICs */ -- cgit v1.2.3 From ed6cfcc5fdf2ebca320b6f74c836e555e18216e1 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Sat, 28 Feb 2015 11:18:17 -0800 Subject: Drivers: hv: vmbus: Introduce a function to remove a rescinded offer In response to a rescind message, we need to remove the channel and the corresponding device. Cleanup this code path by factoring out the code to remove a channel. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 7d976ac01fac..dd92a854c700 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1226,6 +1226,7 @@ void hv_kvp_onchannelcallback(void *); int hv_vss_init(struct hv_util_service *); void hv_vss_deinit(void); void hv_vss_onchannelcallback(void *); +void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid); extern struct resource hyperv_mmio; -- cgit v1.2.3 From a13e8bbe851a96a0e78c2bd599bc34082fa697cd Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Sat, 28 Feb 2015 11:39:02 -0800 Subject: Drivers: hv: vmbus: Use a round-robin algorithm for picking the outgoing channel The current algorithm for picking an outgoing channel was not distributing the load well. Implement a simple round-robin scheme to ensure good distribution of the outgoing traffic. Signed-off-by: K. Y. Srinivasan Reviewed-by: Long Li Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index dd92a854c700..1ca582457076 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -761,6 +761,9 @@ struct vmbus_channel { * link up channels based on their CPU affinity. */ struct list_head percpu_list; + + int num_sc; + int next_oc; }; static inline void set_channel_read_state(struct vmbus_channel *c, bool state) -- cgit v1.2.3 From 87e93d61708fe2c44875d1ecdb174aad070dbd08 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Sat, 28 Feb 2015 11:39:03 -0800 Subject: Drivers: hv: vmbus: Suport an API to send pagebuffers with additional control Implement an API for sending pagebuffers that gives more control to the client in terms of setting the vmbus flags as well as deciding when to notify the host. This will be useful for enabling batch processing. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 1ca582457076..86e1a7a46af3 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -874,6 +874,15 @@ extern int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, u32 bufferlen, u64 requestid); +extern int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel, + struct hv_page_buffer pagebuffers[], + u32 pagecount, + void *buffer, + u32 bufferlen, + u64 requestid, + u32 flags, + bool kick_q); + extern int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel, struct hv_multipage_buffer *mpb, void *buffer, -- cgit v1.2.3 From e9395e3f8952110bda60b54ad03ec52c6e9c7dbd Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Sat, 28 Feb 2015 11:39:04 -0800 Subject: Drivers: hv: vmbus: Suport an API to send packet with additional control Implement an API that gives additional control on the what VMBUS flags will be set as well as if the host needs to be signalled. This API will be useful for clients that want to batch up requests to the host. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 86e1a7a46af3..80e444bfc9dc 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -867,6 +867,14 @@ extern int vmbus_sendpacket(struct vmbus_channel *channel, enum vmbus_packet_type type, u32 flags); +extern int vmbus_sendpacket_ctl(struct vmbus_channel *channel, + void *buffer, + u32 bufferLen, + u64 requestid, + enum vmbus_packet_type type, + u32 flags, + bool kick_q); + extern int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, struct hv_page_buffer pagebuffers[], u32 pagecount, -- cgit v1.2.3 From 287f3a943fef58c5c73e42545169443be379222f Mon Sep 17 00:00:00 2001 From: Simon Farnsworth Date: Sun, 1 Mar 2015 10:54:39 +0000 Subject: pppoe: Use workqueue to die properly when a PADT is received When a PADT frame is received, the socket may not be in a good state to close down the PPP interface. The current implementation handles this by simply blocking all further PPP traffic, and hoping that the lack of traffic will trigger the user to investigate. Use schedule_work to get to a process context from which we clear down the PPP interface, in a fashion analogous to hangup on a TTY-based PPP interface. This causes pppd to disconnect immediately, and allows tools to take immediate corrective action. Note that pppd's rp_pppoe.so plugin has code in it to disable the session when it disconnects; however, as a consequence of this patch, the session is already disabled before rp_pppoe.so is asked to disable the session. The result is a harmless error message: Failed to disconnect PPPoE socket: 114 Operation already in progress This message is safe to ignore, as long as the error is 114 Operation already in progress; in that specific case, it means that the PPPoE session has already been disabled before pppd tried to disable it. Signed-off-by: Simon Farnsworth Tested-by: Dan Williams Tested-by: Christoph Schulz Signed-off-by: David S. Miller --- include/linux/if_pppox.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index aff7ad8a4ea3..66a7d7600f43 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -19,6 +19,7 @@ #include #include #include +#include #include static inline struct pppoe_hdr *pppoe_hdr(const struct sk_buff *skb) @@ -32,6 +33,7 @@ struct pppoe_opt { struct pppoe_addr pa; /* what this socket is bound to*/ struct sockaddr_pppox relay; /* what socket data will be relayed to (PPPoE relaying) */ + struct work_struct padt_work;/* Work item for handling PADT */ }; struct pptp_opt { -- cgit v1.2.3 From 744d5a3e9fe2690dd85d9991dbb078301694658b Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Sun, 1 Mar 2015 14:58:31 +0200 Subject: net: move skb->dropcount to skb->cb[] Commit 977750076d98 ("af_packet: add interframe drop cmsg (v6)") unionized skb->mark and skb->dropcount in order to allow recording of the socket drop count while maintaining struct sk_buff size. skb->dropcount was introduced since there was no available room in skb->cb[] in packet sockets. However, its introduction led to the inability to export skb->mark, or any other aliased field to userspace if so desired. Moving the dropcount metric to skb->cb[] eliminates this problem at the expense of 4 bytes less in skb->cb[] for protocol families using it. Signed-off-by: Eyal Birger Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d898b32dedcc..bba1330757c0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -492,7 +492,6 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, * @napi_id: id of the NAPI struct this skb came from * @secmark: security marking * @mark: Generic packet mark - * @dropcount: total number of sk_receive_queue overflows * @vlan_proto: vlan encapsulation protocol * @vlan_tci: vlan tag control information * @inner_protocol: Protocol (encapsulation) @@ -641,7 +640,6 @@ struct sk_buff { #endif union { __u32 mark; - __u32 dropcount; __u32 reserved_tailroom; }; -- cgit v1.2.3 From 5f80e19081e233698c8ea77ed2dd84a66f49fc54 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Thu, 12 Feb 2015 14:01:25 +0800 Subject: ARM: imx6q: Add GPR3 MIPI muxing control register field shift bits definition This patch adds a macro to define the GPR3 MIPI muxing control register field shift bits. Signed-off-by: Liu Ying Signed-off-by: Shawn Guo --- include/linux/mfd/syscon/imx6q-iomuxc-gpr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h index c877cad61a13..d16f4c82c568 100644 --- a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h +++ b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h @@ -207,6 +207,7 @@ #define IMX6Q_GPR3_LVDS0_MUX_CTL_IPU1_DI1 (0x1 << 6) #define IMX6Q_GPR3_LVDS0_MUX_CTL_IPU2_DI0 (0x2 << 6) #define IMX6Q_GPR3_LVDS0_MUX_CTL_IPU2_DI1 (0x3 << 6) +#define IMX6Q_GPR3_MIPI_MUX_CTL_SHIFT 4 #define IMX6Q_GPR3_MIPI_MUX_CTL_MASK (0x3 << 4) #define IMX6Q_GPR3_MIPI_MUX_CTL_IPU1_DI0 (0x0 << 4) #define IMX6Q_GPR3_MIPI_MUX_CTL_IPU1_DI1 (0x1 << 4) -- cgit v1.2.3 From 4186721d02b71ae943e60bbf50d3488fd5fd6adb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 8 Feb 2015 17:11:47 +0100 Subject: bcma: add helpers bringing PCIe hosted bus up / down MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bringing PCIe hosted bus up requires operating on host-related core. Since we plan to support PCIe Gen 2 devices we should provide a helper picking the correct one (PCIE or PCIE2). Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- include/linux/bcma/bcma.h | 3 +++ include/linux/bcma/bcma_driver_pci.h | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 994739da827f..037620b3f113 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -434,6 +434,9 @@ static inline struct bcma_device *bcma_find_core(struct bcma_bus *bus, return bcma_find_core_unit(bus, coreid, 0); } +extern void bcma_host_pci_up(struct bcma_bus *bus); +extern void bcma_host_pci_down(struct bcma_bus *bus); + extern bool bcma_core_is_enabled(struct bcma_device *core); extern void bcma_core_disable(struct bcma_device *core, u32 flags); extern int bcma_core_enable(struct bcma_device *core, u32 flags); diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index 3f809ae372c4..23af893e9b86 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -242,8 +242,6 @@ extern void bcma_core_pci_early_init(struct bcma_drv_pci *pc); extern void bcma_core_pci_init(struct bcma_drv_pci *pc); extern int bcma_core_pci_irq_ctl(struct bcma_drv_pci *pc, struct bcma_device *core, bool enable); -extern void bcma_core_pci_up(struct bcma_bus *bus); -extern void bcma_core_pci_down(struct bcma_bus *bus); extern void bcma_core_pci_power_save(struct bcma_bus *bus, bool up); extern int bcma_core_pci_pcibios_map_irq(const struct pci_dev *dev); -- cgit v1.2.3 From 5b6ff664c8959d715e785b9465b042407a5d87a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 8 Feb 2015 17:11:48 +0100 Subject: bcma: change IRQ control function to accept bus as an argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It doesn't operate on PCI core, but PCI host device, so there is no point of passing core related struct. Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- include/linux/bcma/bcma_driver_pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index 23af893e9b86..6b8bca67851f 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -240,7 +240,7 @@ struct bcma_drv_pci { extern void bcma_core_pci_early_init(struct bcma_drv_pci *pc); extern void bcma_core_pci_init(struct bcma_drv_pci *pc); -extern int bcma_core_pci_irq_ctl(struct bcma_drv_pci *pc, +extern int bcma_core_pci_irq_ctl(struct bcma_bus *bus, struct bcma_device *core, bool enable); extern void bcma_core_pci_power_save(struct bcma_bus *bus, bool up); -- cgit v1.2.3 From 804e27dee49e20c0addd1b7276654220cc3768ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 8 Feb 2015 17:11:49 +0100 Subject: bcma: support bringing up bus hosted on PCIe Gen 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- include/linux/bcma/bcma_driver_pcie2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma_driver_pcie2.h b/include/linux/bcma/bcma_driver_pcie2.h index 5988b05781c3..d8c43294c527 100644 --- a/include/linux/bcma/bcma_driver_pcie2.h +++ b/include/linux/bcma/bcma_driver_pcie2.h @@ -143,6 +143,8 @@ struct bcma_drv_pcie2 { struct bcma_device *core; + + u16 reqsize; }; #define pcie2_read16(pcie2, offset) bcma_read16((pcie2)->core, offset) -- cgit v1.2.3 From 559addc25b00ff3a40eff03a0b3873c2b6d726f8 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 4 Feb 2015 06:07:30 -0300 Subject: [media] fixp-arith: replace sin/cos table by a better precision one The cos table used at fixp-arith.h has only 8 bits of precision. That causes problems if it is reused on other drivers. As some media drivers require a higher precision sin/cos implementation, replace the current implementation by one that will provide 32 bits precision. The values generated by the new implementation matches the 32 bit precision of glibc's sin for an angle measured in integer degrees. It also provides support for fractional angles via linear interpolation. On experimental calculus, when used a table with a 0.001 degree angle, the maximum error for sin is 0.000038, which is likely good enough for practical purposes. There are some logic there that seems to be specific to the usage inside ff-memless.c. Move those logic to there, as they're not needed elsewhere. Cc: Hans de Goede Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Prashant Laddha Signed-off-by: Hans Verkuil Acked-by: Dmitry Torokhov Signed-off-by: Mauro Carvalho Chehab --- include/linux/fixp-arith.h | 145 +++++++++++++++++++++++++++++++++------------ 1 file changed, 107 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fixp-arith.h b/include/linux/fixp-arith.h index 3089d7382325..d4686fe1cac7 100644 --- a/include/linux/fixp-arith.h +++ b/include/linux/fixp-arith.h @@ -1,6 +1,8 @@ #ifndef _FIXP_ARITH_H #define _FIXP_ARITH_H +#include + /* * Simplistic fixed-point arithmetics. * Hmm, I'm probably duplicating some code :( @@ -29,59 +31,126 @@ #include -/* The type representing fixed-point values */ -typedef s16 fixp_t; +static const s32 sin_table[] = { + 0x00000000, 0x023be165, 0x04779632, 0x06b2f1d2, 0x08edc7b6, 0x0b27eb5c, + 0x0d61304d, 0x0f996a26, 0x11d06c96, 0x14060b67, 0x163a1a7d, 0x186c6ddd, + 0x1a9cd9ac, 0x1ccb3236, 0x1ef74bf2, 0x2120fb82, 0x234815ba, 0x256c6f9e, + 0x278dde6e, 0x29ac379f, 0x2bc750e8, 0x2ddf003f, 0x2ff31bdd, 0x32037a44, + 0x340ff241, 0x36185aee, 0x381c8bb5, 0x3a1c5c56, 0x3c17a4e7, 0x3e0e3ddb, + 0x3fffffff, 0x41ecc483, 0x43d464fa, 0x45b6bb5d, 0x4793a20f, 0x496af3e1, + 0x4b3c8c11, 0x4d084650, 0x4ecdfec6, 0x508d9210, 0x5246dd48, 0x53f9be04, + 0x55a6125a, 0x574bb8e5, 0x58ea90c2, 0x5a827999, 0x5c135399, 0x5d9cff82, + 0x5f1f5ea0, 0x609a52d1, 0x620dbe8a, 0x637984d3, 0x64dd894f, 0x6639b039, + 0x678dde6d, 0x68d9f963, 0x6a1de735, 0x6b598ea1, 0x6c8cd70a, 0x6db7a879, + 0x6ed9eba0, 0x6ff389de, 0x71046d3c, 0x720c8074, 0x730baeec, 0x7401e4bf, + 0x74ef0ebb, 0x75d31a5f, 0x76adf5e5, 0x777f903b, 0x7847d908, 0x7906c0af, + 0x79bc384c, 0x7a6831b8, 0x7b0a9f8c, 0x7ba3751c, 0x7c32a67c, 0x7cb82884, + 0x7d33f0c8, 0x7da5f5a3, 0x7e0e2e31, 0x7e6c924f, 0x7ec11aa3, 0x7f0bc095, + 0x7f4c7e52, 0x7f834ecf, 0x7fb02dc4, 0x7fd317b3, 0x7fec09e1, 0x7ffb025e, + 0x7fffffff +}; -#define FRAC_N 8 -#define FRAC_MASK ((1< 180) { + negative = true; + degrees -= 180; + } + if (degrees > 90) + degrees = 180 - degrees; + ret = sin_table[degrees]; -/* a: 123 -> 123.0 */ -static inline fixp_t fixp_new(s16 a) -{ - return a< -1.0 - 0x8000 -> 1.0 - 0x0000 -> 0.0 -*/ -static inline fixp_t fixp_new16(s16 a) +/** + * fixp_sin32() returns the sin of an angle in degrees + * + * @degrees: angle, in degrees. The angle can be positive or negative + * + * The returned value ranges from -0x7fffffff to +0x7fffffff. + */ +static inline s32 fixp_sin32(int degrees) { - return ((s32)a)>>(16-FRAC_N); + degrees = (degrees % 360 + 360) % 360; + + return __fixp_sin32(degrees); } -static inline fixp_t fixp_cos(unsigned int degrees) +/* cos(x) = sin(x + 90 degrees) */ +#define fixp_cos32(v) fixp_sin32((v) + 90) + +/* + * 16 bits variants + * + * The returned value ranges from -0x7fff to 0x7fff + */ + +#define fixp_sin16(v) (fixp_sin32(v) >> 16) +#define fixp_cos16(v) (fixp_cos32(v) >> 16) + +/** + * fixp_sin32_rad() - calculates the sin of an angle in radians + * + * @radians: angle, in radians + * @twopi: value to be used for 2*pi + * + * Provides a variant for the cases where just 360 + * values is not enough. This function uses linear + * interpolation to a wider range of values given by + * twopi var. + * + * Experimental tests gave a maximum difference of + * 0.000038 between the value calculated by sin() and + * the one produced by this function, when twopi is + * equal to 360000. That seems to be enough precision + * for practical purposes. + * + * Please notice that two high numbers for twopi could cause + * overflows, so the routine will not allow values of twopi + * bigger than 1^18. + */ +static inline s32 fixp_sin32_rad(u32 radians, u32 twopi) { - int quadrant = (degrees / 90) & 3; - unsigned int i = degrees % 90; + int degrees; + s32 v1, v2, dx, dy; + s64 tmp; - if (quadrant == 1 || quadrant == 3) - i = 90 - i; + /* + * Avoid too large values for twopi, as we don't want overflows. + */ + BUG_ON(twopi > 1 << 18); - i >>= 1; + degrees = (radians * 360) / twopi; + tmp = radians - (degrees * twopi) / 360; - return (quadrant == 1 || quadrant == 2)? -cos_table[i] : cos_table[i]; -} + degrees = (degrees % 360 + 360) % 360; + v1 = __fixp_sin32(degrees); -static inline fixp_t fixp_sin(unsigned int degrees) -{ - return -fixp_cos(degrees + 90); -} + v2 = fixp_sin32(degrees + 1); -static inline fixp_t fixp_mult(fixp_t a, fixp_t b) -{ - return ((s32)(a*b))>>FRAC_N; + dx = twopi / 360; + dy = v2 - v1; + + tmp *= dy; + + return v1 + div_s64(tmp, dx); } +/* cos(x) = sin(x + pi/2 radians) */ + +#define fixp_cos32_rad(rad, twopi) \ + fixp_sin32_rad(rad + twopi / 4, twopi) + #endif -- cgit v1.2.3 From 1b784140474e4fc94281a49e96c67d29df0efbde Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 2 Mar 2015 15:37:48 +0800 Subject: net: Remove iocb argument from sendmsg and recvmsg After TIPC doesn't depend on iocb argument in its internal implementations of sendmsg() and recvmsg() hooks defined in proto structure, no any user is using iocb argument in them at all now. Then we can drop the redundant iocb argument completely from kinds of implementations of both sendmsg() and recvmsg() in the entire networking stack. Cc: Christoph Hellwig Suggested-by: Al Viro Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- include/linux/net.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 17d83393afcc..e74114bcca68 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -120,7 +120,6 @@ struct socket { struct vm_area_struct; struct page; -struct kiocb; struct sockaddr; struct msghdr; struct module; @@ -162,8 +161,8 @@ struct proto_ops { int (*compat_getsockopt)(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); #endif - int (*sendmsg) (struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len); + int (*sendmsg) (struct socket *sock, struct msghdr *m, + size_t total_len); /* Notes for implementing recvmsg: * =============================== * msg->msg_namelen should get updated by the recvmsg handlers @@ -172,9 +171,8 @@ struct proto_ops { * handlers can assume that msg.msg_name is either NULL or has * a minimum size of sizeof(struct sockaddr_storage). */ - int (*recvmsg) (struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len, - int flags); + int (*recvmsg) (struct socket *sock, struct msghdr *m, + size_t total_len, int flags); int (*mmap) (struct file *file, struct socket *sock, struct vm_area_struct * vma); ssize_t (*sendpage) (struct socket *sock, struct page *page, -- cgit v1.2.3 From 61e021f3b86cbbcc04cbe8ac7b7da2b8c94b5e8e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 2 Mar 2015 15:21:55 +0100 Subject: ebpf: move CONFIG_BPF_SYSCALL-only function declarations Masami noted that it would be better to hide the remaining CONFIG_BPF_SYSCALL-only function declarations within the BPF header ifdef, w/o else path dummy alternatives since these functions are not supposed to have a user outside of CONFIG_BPF_SYSCALL. Suggested-by: Masami Hiramatsu Reference: http://article.gmane.org/gmane.linux.kernel.api/8658 Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a1a7ff2df328..a884f5a2c503 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -42,10 +42,6 @@ struct bpf_map_type_list { enum bpf_map_type type; }; -void bpf_register_map_type(struct bpf_map_type_list *tl); -void bpf_map_put(struct bpf_map *map); -struct bpf_map *bpf_map_get(struct fd f); - /* function argument constraints */ enum bpf_arg_type { ARG_ANYTHING = 0, /* any argument is ok */ @@ -126,9 +122,16 @@ struct bpf_prog_aux { #ifdef CONFIG_BPF_SYSCALL void bpf_register_prog_type(struct bpf_prog_type_list *tl); +void bpf_register_map_type(struct bpf_map_type_list *tl); -void bpf_prog_put(struct bpf_prog *prog); struct bpf_prog *bpf_prog_get(u32 ufd); +void bpf_prog_put(struct bpf_prog *prog); + +struct bpf_map *bpf_map_get(struct fd f); +void bpf_map_put(struct bpf_map *map); + +/* verify correctness of eBPF program */ +int bpf_check(struct bpf_prog *fp, union bpf_attr *attr); #else static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl) { @@ -142,10 +145,7 @@ static inline struct bpf_prog *bpf_prog_get(u32 ufd) static inline void bpf_prog_put(struct bpf_prog *prog) { } -#endif - -/* verify correctness of eBPF program */ -int bpf_check(struct bpf_prog *fp, union bpf_attr *attr); +#endif /* CONFIG_BPF_SYSCALL */ /* verifier prototypes for helper functions called from eBPF programs */ extern const struct bpf_func_proto bpf_map_lookup_elem_proto; -- cgit v1.2.3 From d476059e77d1af48453a58f9de1e36f2eaff6450 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 2 Mar 2015 00:11:09 -0600 Subject: net: Kill dev_rebuild_header Now that there are no more users kill dev_rebuild_header and all of it's implementations. This is long overdue. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 1 - include/linux/netdevice.h | 12 +----------- 2 files changed, 1 insertion(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 1d869d185a0d..606563ef8a72 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -35,7 +35,6 @@ extern const struct header_ops eth_header_ops; int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned len); -int eth_rebuild_header(struct sk_buff *skb); int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr); int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 type); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5897b4ea5a3f..2007f3b44d05 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -261,7 +261,6 @@ struct header_ops { unsigned short type, const void *daddr, const void *saddr, unsigned int len); int (*parse)(const struct sk_buff *skb, unsigned char *haddr); - int (*rebuild)(struct sk_buff *skb); int (*cache)(const struct neighbour *neigh, struct hh_cache *hh, __be16 type); void (*cache_update)(struct hh_cache *hh, const struct net_device *dev, @@ -1346,7 +1345,7 @@ enum netdev_priv_flags { * if one wants to override the ndo_*() functions * @ethtool_ops: Management operations * @fwd_ops: Management operations - * @header_ops: Includes callbacks for creating,parsing,rebuilding,etc + * @header_ops: Includes callbacks for creating,parsing,caching,etc * of Layer 2 headers. * * @flags: Interface flags (a la BSD) @@ -2399,15 +2398,6 @@ static inline int dev_parse_header(const struct sk_buff *skb, return dev->header_ops->parse(skb, haddr); } -static inline int dev_rebuild_header(struct sk_buff *skb) -{ - const struct net_device *dev = skb->dev; - - if (!dev->header_ops || !dev->header_ops->rebuild) - return 0; - return dev->header_ops->rebuild(skb); -} - typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len); int register_gifconf(unsigned int family, gifconf_func_t *gifconf); static inline int unregister_gifconf(unsigned int family) -- cgit v1.2.3 From b10848e6f9fa7638fc0713695a12c0735ffb52b7 Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Wed, 7 Jan 2015 11:19:36 +0530 Subject: mfd: ti_am335x_tscadc: Remove unwanted reg_se_cache save In one shot mode, sequencer automatically disables all enabled steps at the end of each cycle. (both ADC steps and TSC steps) Hence these steps need not be saved in reg_se_cache for clearing these steps at a later stage. Also, when ADC wakes up Sequencer should not be busy executing any of the config steps except for the charge step. Previously charge step was 1 ADC clock cycle and hence it was ignored. TSC steps are always disabled at the end of each conversion cycle, hence there is no need to explicitly disable TSC steps by writing 0 to REG_SE. Signed-off-by: Vignesh R Signed-off-by: Lee Jones --- include/linux/mfd/ti_am335x_tscadc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h index 3f4e994ace2b..1fd50dcfe47c 100644 --- a/include/linux/mfd/ti_am335x_tscadc.h +++ b/include/linux/mfd/ti_am335x_tscadc.h @@ -128,6 +128,7 @@ /* Sequencer Status */ #define SEQ_STATUS BIT(5) +#define CHARGE_STEP 0x11 #define ADC_CLK 3000000 #define TOTAL_STEPS 16 -- cgit v1.2.3 From 84f00b1b9631319361f6c36e2f5a8e833d09af5b Mon Sep 17 00:00:00 2001 From: Micky Ching Date: Wed, 25 Feb 2015 13:50:08 +0800 Subject: mfd: rtsx: Replace TAB by SPC after #define Re-format coding-style, using uniform SPC after "#define" keyword instead of mixing using TAB and SPC. Signed-off-by: Micky Ching Signed-off-by: Lee Jones --- include/linux/mfd/rtsx_pci.h | 254 +++++++++++++++++++++---------------------- 1 file changed, 127 insertions(+), 127 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h index 0c12628e91c6..a9c2a14fd521 100644 --- a/include/linux/mfd/rtsx_pci.h +++ b/include/linux/mfd/rtsx_pci.h @@ -175,9 +175,9 @@ /* CARD_SHARE_MODE */ #define CARD_SHARE_MASK 0x0F #define CARD_SHARE_MULTI_LUN 0x00 -#define CARD_SHARE_NORMAL 0x00 -#define CARD_SHARE_48_SD 0x04 -#define CARD_SHARE_48_MS 0x08 +#define CARD_SHARE_NORMAL 0x00 +#define CARD_SHARE_48_SD 0x04 +#define CARD_SHARE_48_MS 0x08 /* CARD_SHARE_MODE for barossa */ #define CARD_SHARE_BAROSSA_SD 0x01 #define CARD_SHARE_BAROSSA_MS 0x02 @@ -249,76 +249,76 @@ #define CD_AUTO_DISABLE 0x40 /* SD_STAT1 */ -#define SD_CRC7_ERR 0x80 -#define SD_CRC16_ERR 0x40 -#define SD_CRC_WRITE_ERR 0x20 -#define SD_CRC_WRITE_ERR_MASK 0x1C -#define GET_CRC_TIME_OUT 0x02 -#define SD_TUNING_COMPARE_ERR 0x01 +#define SD_CRC7_ERR 0x80 +#define SD_CRC16_ERR 0x40 +#define SD_CRC_WRITE_ERR 0x20 +#define SD_CRC_WRITE_ERR_MASK 0x1C +#define GET_CRC_TIME_OUT 0x02 +#define SD_TUNING_COMPARE_ERR 0x01 /* SD_STAT2 */ -#define SD_RSP_80CLK_TIMEOUT 0x01 +#define SD_RSP_80CLK_TIMEOUT 0x01 /* SD_BUS_STAT */ -#define SD_CLK_TOGGLE_EN 0x80 -#define SD_CLK_FORCE_STOP 0x40 -#define SD_DAT3_STATUS 0x10 -#define SD_DAT2_STATUS 0x08 -#define SD_DAT1_STATUS 0x04 -#define SD_DAT0_STATUS 0x02 -#define SD_CMD_STATUS 0x01 +#define SD_CLK_TOGGLE_EN 0x80 +#define SD_CLK_FORCE_STOP 0x40 +#define SD_DAT3_STATUS 0x10 +#define SD_DAT2_STATUS 0x08 +#define SD_DAT1_STATUS 0x04 +#define SD_DAT0_STATUS 0x02 +#define SD_CMD_STATUS 0x01 /* SD_PAD_CTL */ -#define SD_IO_USING_1V8 0x80 -#define SD_IO_USING_3V3 0x7F -#define TYPE_A_DRIVING 0x00 -#define TYPE_B_DRIVING 0x01 -#define TYPE_C_DRIVING 0x02 -#define TYPE_D_DRIVING 0x03 +#define SD_IO_USING_1V8 0x80 +#define SD_IO_USING_3V3 0x7F +#define TYPE_A_DRIVING 0x00 +#define TYPE_B_DRIVING 0x01 +#define TYPE_C_DRIVING 0x02 +#define TYPE_D_DRIVING 0x03 /* SD_SAMPLE_POINT_CTL */ -#define DDR_FIX_RX_DAT 0x00 -#define DDR_VAR_RX_DAT 0x80 -#define DDR_FIX_RX_DAT_EDGE 0x00 -#define DDR_FIX_RX_DAT_14_DELAY 0x40 -#define DDR_FIX_RX_CMD 0x00 -#define DDR_VAR_RX_CMD 0x20 -#define DDR_FIX_RX_CMD_POS_EDGE 0x00 -#define DDR_FIX_RX_CMD_14_DELAY 0x10 -#define SD20_RX_POS_EDGE 0x00 -#define SD20_RX_14_DELAY 0x08 +#define DDR_FIX_RX_DAT 0x00 +#define DDR_VAR_RX_DAT 0x80 +#define DDR_FIX_RX_DAT_EDGE 0x00 +#define DDR_FIX_RX_DAT_14_DELAY 0x40 +#define DDR_FIX_RX_CMD 0x00 +#define DDR_VAR_RX_CMD 0x20 +#define DDR_FIX_RX_CMD_POS_EDGE 0x00 +#define DDR_FIX_RX_CMD_14_DELAY 0x10 +#define SD20_RX_POS_EDGE 0x00 +#define SD20_RX_14_DELAY 0x08 #define SD20_RX_SEL_MASK 0x08 /* SD_PUSH_POINT_CTL */ -#define DDR_FIX_TX_CMD_DAT 0x00 -#define DDR_VAR_TX_CMD_DAT 0x80 -#define DDR_FIX_TX_DAT_14_TSU 0x00 -#define DDR_FIX_TX_DAT_12_TSU 0x40 -#define DDR_FIX_TX_CMD_NEG_EDGE 0x00 -#define DDR_FIX_TX_CMD_14_AHEAD 0x20 -#define SD20_TX_NEG_EDGE 0x00 -#define SD20_TX_14_AHEAD 0x10 +#define DDR_FIX_TX_CMD_DAT 0x00 +#define DDR_VAR_TX_CMD_DAT 0x80 +#define DDR_FIX_TX_DAT_14_TSU 0x00 +#define DDR_FIX_TX_DAT_12_TSU 0x40 +#define DDR_FIX_TX_CMD_NEG_EDGE 0x00 +#define DDR_FIX_TX_CMD_14_AHEAD 0x20 +#define SD20_TX_NEG_EDGE 0x00 +#define SD20_TX_14_AHEAD 0x10 #define SD20_TX_SEL_MASK 0x10 -#define DDR_VAR_SDCLK_POL_SWAP 0x01 +#define DDR_VAR_SDCLK_POL_SWAP 0x01 /* SD_TRANSFER */ -#define SD_TRANSFER_START 0x80 -#define SD_TRANSFER_END 0x40 +#define SD_TRANSFER_START 0x80 +#define SD_TRANSFER_END 0x40 #define SD_STAT_IDLE 0x20 -#define SD_TRANSFER_ERR 0x10 +#define SD_TRANSFER_ERR 0x10 /* SD Transfer Mode definition */ -#define SD_TM_NORMAL_WRITE 0x00 -#define SD_TM_AUTO_WRITE_3 0x01 -#define SD_TM_AUTO_WRITE_4 0x02 -#define SD_TM_AUTO_READ_3 0x05 -#define SD_TM_AUTO_READ_4 0x06 -#define SD_TM_CMD_RSP 0x08 -#define SD_TM_AUTO_WRITE_1 0x09 -#define SD_TM_AUTO_WRITE_2 0x0A -#define SD_TM_NORMAL_READ 0x0C -#define SD_TM_AUTO_READ_1 0x0D -#define SD_TM_AUTO_READ_2 0x0E -#define SD_TM_AUTO_TUNING 0x0F +#define SD_TM_NORMAL_WRITE 0x00 +#define SD_TM_AUTO_WRITE_3 0x01 +#define SD_TM_AUTO_WRITE_4 0x02 +#define SD_TM_AUTO_READ_3 0x05 +#define SD_TM_AUTO_READ_4 0x06 +#define SD_TM_CMD_RSP 0x08 +#define SD_TM_AUTO_WRITE_1 0x09 +#define SD_TM_AUTO_WRITE_2 0x0A +#define SD_TM_NORMAL_READ 0x0C +#define SD_TM_AUTO_READ_1 0x0D +#define SD_TM_AUTO_READ_2 0x0E +#define SD_TM_AUTO_TUNING 0x0F /* SD_VPTX_CTL / SD_VPRX_CTL */ #define PHASE_CHANGE 0x80 @@ -332,15 +332,15 @@ /* SD Configure 1 Register */ #define SD_CLK_DIVIDE_0 0x00 -#define SD_CLK_DIVIDE_256 0xC0 -#define SD_CLK_DIVIDE_128 0x80 -#define SD_BUS_WIDTH_1BIT 0x00 -#define SD_BUS_WIDTH_4BIT 0x01 -#define SD_BUS_WIDTH_8BIT 0x02 -#define SD_ASYNC_FIFO_NOT_RST 0x10 -#define SD_20_MODE 0x00 -#define SD_DDR_MODE 0x04 -#define SD_30_MODE 0x08 +#define SD_CLK_DIVIDE_256 0xC0 +#define SD_CLK_DIVIDE_128 0x80 +#define SD_BUS_WIDTH_1BIT 0x00 +#define SD_BUS_WIDTH_4BIT 0x01 +#define SD_BUS_WIDTH_8BIT 0x02 +#define SD_ASYNC_FIFO_NOT_RST 0x10 +#define SD_20_MODE 0x00 +#define SD_DDR_MODE 0x04 +#define SD_30_MODE 0x08 #define SD_CLK_DIVIDE_MASK 0xC0 @@ -415,71 +415,71 @@ #define CLK_DIV_8 0x04 /* MS_CFG */ -#define SAMPLE_TIME_RISING 0x00 -#define SAMPLE_TIME_FALLING 0x80 -#define PUSH_TIME_DEFAULT 0x00 -#define PUSH_TIME_ODD 0x40 -#define NO_EXTEND_TOGGLE 0x00 -#define EXTEND_TOGGLE_CHK 0x20 -#define MS_BUS_WIDTH_1 0x00 -#define MS_BUS_WIDTH_4 0x10 -#define MS_BUS_WIDTH_8 0x18 -#define MS_2K_SECTOR_MODE 0x04 -#define MS_512_SECTOR_MODE 0x00 -#define MS_TOGGLE_TIMEOUT_EN 0x00 -#define MS_TOGGLE_TIMEOUT_DISEN 0x01 +#define SAMPLE_TIME_RISING 0x00 +#define SAMPLE_TIME_FALLING 0x80 +#define PUSH_TIME_DEFAULT 0x00 +#define PUSH_TIME_ODD 0x40 +#define NO_EXTEND_TOGGLE 0x00 +#define EXTEND_TOGGLE_CHK 0x20 +#define MS_BUS_WIDTH_1 0x00 +#define MS_BUS_WIDTH_4 0x10 +#define MS_BUS_WIDTH_8 0x18 +#define MS_2K_SECTOR_MODE 0x04 +#define MS_512_SECTOR_MODE 0x00 +#define MS_TOGGLE_TIMEOUT_EN 0x00 +#define MS_TOGGLE_TIMEOUT_DISEN 0x01 #define MS_NO_CHECK_INT 0x02 /* MS_TRANS_CFG */ -#define WAIT_INT 0x80 -#define NO_WAIT_INT 0x00 -#define NO_AUTO_READ_INT_REG 0x00 -#define AUTO_READ_INT_REG 0x40 -#define MS_CRC16_ERR 0x20 -#define MS_RDY_TIMEOUT 0x10 -#define MS_INT_CMDNK 0x08 -#define MS_INT_BREQ 0x04 -#define MS_INT_ERR 0x02 -#define MS_INT_CED 0x01 +#define WAIT_INT 0x80 +#define NO_WAIT_INT 0x00 +#define NO_AUTO_READ_INT_REG 0x00 +#define AUTO_READ_INT_REG 0x40 +#define MS_CRC16_ERR 0x20 +#define MS_RDY_TIMEOUT 0x10 +#define MS_INT_CMDNK 0x08 +#define MS_INT_BREQ 0x04 +#define MS_INT_ERR 0x02 +#define MS_INT_CED 0x01 /* MS_TRANSFER */ -#define MS_TRANSFER_START 0x80 -#define MS_TRANSFER_END 0x40 -#define MS_TRANSFER_ERR 0x20 -#define MS_BS_STATE 0x10 -#define MS_TM_READ_BYTES 0x00 -#define MS_TM_NORMAL_READ 0x01 -#define MS_TM_WRITE_BYTES 0x04 -#define MS_TM_NORMAL_WRITE 0x05 -#define MS_TM_AUTO_READ 0x08 -#define MS_TM_AUTO_WRITE 0x0C +#define MS_TRANSFER_START 0x80 +#define MS_TRANSFER_END 0x40 +#define MS_TRANSFER_ERR 0x20 +#define MS_BS_STATE 0x10 +#define MS_TM_READ_BYTES 0x00 +#define MS_TM_NORMAL_READ 0x01 +#define MS_TM_WRITE_BYTES 0x04 +#define MS_TM_NORMAL_WRITE 0x05 +#define MS_TM_AUTO_READ 0x08 +#define MS_TM_AUTO_WRITE 0x0C /* SD Configure 2 Register */ -#define SD_CALCULATE_CRC7 0x00 -#define SD_NO_CALCULATE_CRC7 0x80 -#define SD_CHECK_CRC16 0x00 -#define SD_NO_CHECK_CRC16 0x40 +#define SD_CALCULATE_CRC7 0x00 +#define SD_NO_CALCULATE_CRC7 0x80 +#define SD_CHECK_CRC16 0x00 +#define SD_NO_CHECK_CRC16 0x40 #define SD_NO_CHECK_WAIT_CRC_TO 0x20 -#define SD_WAIT_BUSY_END 0x08 -#define SD_NO_WAIT_BUSY_END 0x00 -#define SD_CHECK_CRC7 0x00 -#define SD_NO_CHECK_CRC7 0x04 -#define SD_RSP_LEN_0 0x00 -#define SD_RSP_LEN_6 0x01 -#define SD_RSP_LEN_17 0x02 +#define SD_WAIT_BUSY_END 0x08 +#define SD_NO_WAIT_BUSY_END 0x00 +#define SD_CHECK_CRC7 0x00 +#define SD_NO_CHECK_CRC7 0x04 +#define SD_RSP_LEN_0 0x00 +#define SD_RSP_LEN_6 0x01 +#define SD_RSP_LEN_17 0x02 /* SD/MMC Response Type Definition */ -#define SD_RSP_TYPE_R0 0x04 -#define SD_RSP_TYPE_R1 0x01 -#define SD_RSP_TYPE_R1b 0x09 -#define SD_RSP_TYPE_R2 0x02 -#define SD_RSP_TYPE_R3 0x05 -#define SD_RSP_TYPE_R4 0x05 -#define SD_RSP_TYPE_R5 0x01 -#define SD_RSP_TYPE_R6 0x01 -#define SD_RSP_TYPE_R7 0x01 +#define SD_RSP_TYPE_R0 0x04 +#define SD_RSP_TYPE_R1 0x01 +#define SD_RSP_TYPE_R1b 0x09 +#define SD_RSP_TYPE_R2 0x02 +#define SD_RSP_TYPE_R3 0x05 +#define SD_RSP_TYPE_R4 0x05 +#define SD_RSP_TYPE_R5 0x01 +#define SD_RSP_TYPE_R6 0x01 +#define SD_RSP_TYPE_R7 0x01 /* SD_CONFIGURE3 */ -#define SD_RSP_80CLK_TIMEOUT_EN 0x01 +#define SD_RSP_80CLK_TIMEOUT_EN 0x01 /* Card Transfer Reset Register */ #define SPI_STOP 0x01 @@ -574,13 +574,13 @@ #define SRCTL 0xFC13 -#define DCM_DRP_CTL 0xFC23 -#define DCM_DRP_TRIG 0xFC24 -#define DCM_DRP_CFG 0xFC25 -#define DCM_DRP_WR_DATA_L 0xFC26 -#define DCM_DRP_WR_DATA_H 0xFC27 -#define DCM_DRP_RD_DATA_L 0xFC28 -#define DCM_DRP_RD_DATA_H 0xFC29 +#define DCM_DRP_CTL 0xFC23 +#define DCM_DRP_TRIG 0xFC24 +#define DCM_DRP_CFG 0xFC25 +#define DCM_DRP_WR_DATA_L 0xFC26 +#define DCM_DRP_WR_DATA_H 0xFC27 +#define DCM_DRP_RD_DATA_L 0xFC28 +#define DCM_DRP_RD_DATA_H 0xFC29 #define SD_VPCLK0_CTL 0xFC2A #define SD_VPCLK1_CTL 0xFC2B #define SD_DCMPS0_CTL 0xFC2C -- cgit v1.2.3 From ada71f5588320e7a5c7166cb7c1c8c40cb866ac4 Mon Sep 17 00:00:00 2001 From: Micky Ching Date: Wed, 25 Feb 2015 13:50:09 +0800 Subject: mfd: rtsx: Place register address and values togather It is more readable to place register address and values define togather. The values define add two leading space indicate belong to the register address defined above. Signed-off-by: Micky Ching Signed-off-by: Lee Jones --- include/linux/mfd/rtsx_pci.h | 836 +++++++++++++++++++------------------------ 1 file changed, 369 insertions(+), 467 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h index a9c2a14fd521..e81f2bbfcda0 100644 --- a/include/linux/mfd/rtsx_pci.h +++ b/include/linux/mfd/rtsx_pci.h @@ -28,74 +28,72 @@ #define MAX_RW_REG_CNT 1024 -/* PCI Operation Register Address */ #define RTSX_HCBAR 0x00 #define RTSX_HCBCTLR 0x04 +#define STOP_CMD (0x01 << 28) +#define READ_REG_CMD 0 +#define WRITE_REG_CMD 1 +#define CHECK_REG_CMD 2 + #define RTSX_HDBAR 0x08 +#define SG_INT 0x04 +#define SG_END 0x02 +#define SG_VALID 0x01 +#define SG_NO_OP 0x00 +#define SG_TRANS_DATA (0x02 << 4) +#define SG_LINK_DESC (0x03 << 4) #define RTSX_HDBCTLR 0x0C +#define SDMA_MODE 0x00 +#define ADMA_MODE (0x02 << 26) +#define STOP_DMA (0x01 << 28) +#define TRIG_DMA (0x01 << 31) + #define RTSX_HAIMR 0x10 -#define RTSX_BIPR 0x14 -#define RTSX_BIER 0x18 +#define HAIMR_TRANS_START (0x01 << 31) +#define HAIMR_READ 0x00 +#define HAIMR_WRITE (0x01 << 30) +#define HAIMR_READ_START (HAIMR_TRANS_START | HAIMR_READ) +#define HAIMR_WRITE_START (HAIMR_TRANS_START | HAIMR_WRITE) +#define HAIMR_TRANS_END (HAIMR_TRANS_START) -/* Host command buffer control register */ -#define STOP_CMD (0x01 << 28) - -/* Host data buffer control register */ -#define SDMA_MODE 0x00 -#define ADMA_MODE (0x02 << 26) -#define STOP_DMA (0x01 << 28) -#define TRIG_DMA (0x01 << 31) - -/* Host access internal memory register */ -#define HAIMR_TRANS_START (0x01 << 31) -#define HAIMR_READ 0x00 -#define HAIMR_WRITE (0x01 << 30) -#define HAIMR_READ_START (HAIMR_TRANS_START | HAIMR_READ) -#define HAIMR_WRITE_START (HAIMR_TRANS_START | HAIMR_WRITE) -#define HAIMR_TRANS_END (HAIMR_TRANS_START) - -/* Bus interrupt pending register */ -#define CMD_DONE_INT (1 << 31) -#define DATA_DONE_INT (1 << 30) -#define TRANS_OK_INT (1 << 29) -#define TRANS_FAIL_INT (1 << 28) -#define XD_INT (1 << 27) -#define MS_INT (1 << 26) -#define SD_INT (1 << 25) -#define GPIO0_INT (1 << 24) -#define OC_INT (1 << 23) -#define SD_WRITE_PROTECT (1 << 19) -#define XD_EXIST (1 << 18) -#define MS_EXIST (1 << 17) -#define SD_EXIST (1 << 16) -#define DELINK_INT GPIO0_INT -#define MS_OC_INT (1 << 23) -#define SD_OC_INT (1 << 22) +#define RTSX_BIPR 0x14 +#define CMD_DONE_INT (1 << 31) +#define DATA_DONE_INT (1 << 30) +#define TRANS_OK_INT (1 << 29) +#define TRANS_FAIL_INT (1 << 28) +#define XD_INT (1 << 27) +#define MS_INT (1 << 26) +#define SD_INT (1 << 25) +#define GPIO0_INT (1 << 24) +#define OC_INT (1 << 23) +#define SD_WRITE_PROTECT (1 << 19) +#define XD_EXIST (1 << 18) +#define MS_EXIST (1 << 17) +#define SD_EXIST (1 << 16) +#define DELINK_INT GPIO0_INT +#define MS_OC_INT (1 << 23) +#define SD_OC_INT (1 << 22) #define CARD_INT (XD_INT | MS_INT | SD_INT) #define NEED_COMPLETE_INT (DATA_DONE_INT | TRANS_OK_INT | TRANS_FAIL_INT) #define RTSX_INT (CMD_DONE_INT | NEED_COMPLETE_INT | \ CARD_INT | GPIO0_INT | OC_INT) - #define CARD_EXIST (XD_EXIST | MS_EXIST | SD_EXIST) -/* Bus interrupt enable register */ -#define CMD_DONE_INT_EN (1 << 31) -#define DATA_DONE_INT_EN (1 << 30) -#define TRANS_OK_INT_EN (1 << 29) -#define TRANS_FAIL_INT_EN (1 << 28) -#define XD_INT_EN (1 << 27) -#define MS_INT_EN (1 << 26) -#define SD_INT_EN (1 << 25) -#define GPIO0_INT_EN (1 << 24) -#define OC_INT_EN (1 << 23) -#define DELINK_INT_EN GPIO0_INT_EN -#define MS_OC_INT_EN (1 << 23) -#define SD_OC_INT_EN (1 << 22) - -#define READ_REG_CMD 0 -#define WRITE_REG_CMD 1 -#define CHECK_REG_CMD 2 +#define RTSX_BIER 0x18 +#define CMD_DONE_INT_EN (1 << 31) +#define DATA_DONE_INT_EN (1 << 30) +#define TRANS_OK_INT_EN (1 << 29) +#define TRANS_FAIL_INT_EN (1 << 28) +#define XD_INT_EN (1 << 27) +#define MS_INT_EN (1 << 26) +#define SD_INT_EN (1 << 25) +#define GPIO0_INT_EN (1 << 24) +#define OC_INT_EN (1 << 23) +#define DELINK_INT_EN GPIO0_INT_EN +#define MS_OC_INT_EN (1 << 23) +#define SD_OC_INT_EN (1 << 22) + /* * macros for easy use @@ -125,423 +123,68 @@ #define rtsx_pci_write_config_dword(pcr, where, val) \ pci_write_config_dword((pcr)->pci, where, val) -#define STATE_TRANS_NONE 0 -#define STATE_TRANS_CMD 1 -#define STATE_TRANS_BUF 2 -#define STATE_TRANS_SG 3 - -#define TRANS_NOT_READY 0 -#define TRANS_RESULT_OK 1 -#define TRANS_RESULT_FAIL 2 -#define TRANS_NO_DEVICE 3 - -#define RTSX_RESV_BUF_LEN 4096 -#define HOST_CMDS_BUF_LEN 1024 -#define HOST_SG_TBL_BUF_LEN (RTSX_RESV_BUF_LEN - HOST_CMDS_BUF_LEN) -#define HOST_SG_TBL_ITEMS (HOST_SG_TBL_BUF_LEN / 8) -#define MAX_SG_ITEM_LEN 0x80000 - -#define HOST_TO_DEVICE 0 -#define DEVICE_TO_HOST 1 - -#define RTSX_PHASE_MAX 32 -#define RX_TUNING_CNT 3 - -/* SG descriptor */ -#define SG_INT 0x04 -#define SG_END 0x02 -#define SG_VALID 0x01 - -#define SG_NO_OP 0x00 -#define SG_TRANS_DATA (0x02 << 4) -#define SG_LINK_DESC (0x03 << 4) - -/* Output voltage */ -#define OUTPUT_3V3 0 -#define OUTPUT_1V8 1 - -/* Card Clock Enable Register */ -#define SD_CLK_EN 0x04 -#define MS_CLK_EN 0x08 - -/* Card Select Register */ -#define SD_MOD_SEL 2 -#define MS_MOD_SEL 3 - -/* Card Output Enable Register */ -#define SD_OUTPUT_EN 0x04 -#define MS_OUTPUT_EN 0x08 - -/* CARD_SHARE_MODE */ -#define CARD_SHARE_MASK 0x0F -#define CARD_SHARE_MULTI_LUN 0x00 -#define CARD_SHARE_NORMAL 0x00 -#define CARD_SHARE_48_SD 0x04 -#define CARD_SHARE_48_MS 0x08 -/* CARD_SHARE_MODE for barossa */ -#define CARD_SHARE_BAROSSA_SD 0x01 -#define CARD_SHARE_BAROSSA_MS 0x02 - -/* CARD_DRIVE_SEL */ -#define MS_DRIVE_8mA (0x01 << 6) -#define MMC_DRIVE_8mA (0x01 << 4) -#define XD_DRIVE_8mA (0x01 << 2) -#define GPIO_DRIVE_8mA 0x01 -#define RTS5209_CARD_DRIVE_DEFAULT (MS_DRIVE_8mA | MMC_DRIVE_8mA |\ - XD_DRIVE_8mA | GPIO_DRIVE_8mA) -#define RTL8411_CARD_DRIVE_DEFAULT (MS_DRIVE_8mA | MMC_DRIVE_8mA |\ - XD_DRIVE_8mA) -#define RTSX_CARD_DRIVE_DEFAULT (MS_DRIVE_8mA | GPIO_DRIVE_8mA) +#define STATE_TRANS_NONE 0 +#define STATE_TRANS_CMD 1 +#define STATE_TRANS_BUF 2 +#define STATE_TRANS_SG 3 -/* SD30_DRIVE_SEL */ -#define DRIVER_TYPE_A 0x05 -#define DRIVER_TYPE_B 0x03 -#define DRIVER_TYPE_C 0x02 -#define DRIVER_TYPE_D 0x01 -#define CFG_DRIVER_TYPE_A 0x02 -#define CFG_DRIVER_TYPE_B 0x03 -#define CFG_DRIVER_TYPE_C 0x01 -#define CFG_DRIVER_TYPE_D 0x00 - -/* FPDCTL */ -#define SSC_POWER_DOWN 0x01 -#define SD_OC_POWER_DOWN 0x02 -#define ALL_POWER_DOWN 0x07 -#define OC_POWER_DOWN 0x06 - -/* CLK_CTL */ -#define CHANGE_CLK 0x01 - -/* LDO_CTL */ -#define BPP_ASIC_1V7 0x00 -#define BPP_ASIC_1V8 0x01 -#define BPP_ASIC_1V9 0x02 -#define BPP_ASIC_2V0 0x03 -#define BPP_ASIC_2V7 0x04 -#define BPP_ASIC_2V8 0x05 -#define BPP_ASIC_3V2 0x06 -#define BPP_ASIC_3V3 0x07 -#define BPP_REG_TUNED18 0x07 -#define BPP_TUNED18_SHIFT_8402 5 -#define BPP_TUNED18_SHIFT_8411 4 -#define BPP_PAD_MASK 0x04 -#define BPP_PAD_3V3 0x04 -#define BPP_PAD_1V8 0x00 -#define BPP_LDO_POWB 0x03 -#define BPP_LDO_ON 0x00 -#define BPP_LDO_SUSPEND 0x02 -#define BPP_LDO_OFF 0x03 - -/* CD_PAD_CTL */ -#define CD_DISABLE_MASK 0x07 -#define MS_CD_DISABLE 0x04 -#define SD_CD_DISABLE 0x02 -#define XD_CD_DISABLE 0x01 -#define CD_DISABLE 0x07 -#define CD_ENABLE 0x00 -#define MS_CD_EN_ONLY 0x03 -#define SD_CD_EN_ONLY 0x05 -#define XD_CD_EN_ONLY 0x06 -#define FORCE_CD_LOW_MASK 0x38 -#define FORCE_CD_XD_LOW 0x08 -#define FORCE_CD_SD_LOW 0x10 -#define FORCE_CD_MS_LOW 0x20 -#define CD_AUTO_DISABLE 0x40 - -/* SD_STAT1 */ -#define SD_CRC7_ERR 0x80 -#define SD_CRC16_ERR 0x40 -#define SD_CRC_WRITE_ERR 0x20 -#define SD_CRC_WRITE_ERR_MASK 0x1C -#define GET_CRC_TIME_OUT 0x02 -#define SD_TUNING_COMPARE_ERR 0x01 - -/* SD_STAT2 */ -#define SD_RSP_80CLK_TIMEOUT 0x01 - -/* SD_BUS_STAT */ -#define SD_CLK_TOGGLE_EN 0x80 -#define SD_CLK_FORCE_STOP 0x40 -#define SD_DAT3_STATUS 0x10 -#define SD_DAT2_STATUS 0x08 -#define SD_DAT1_STATUS 0x04 -#define SD_DAT0_STATUS 0x02 -#define SD_CMD_STATUS 0x01 - -/* SD_PAD_CTL */ -#define SD_IO_USING_1V8 0x80 -#define SD_IO_USING_3V3 0x7F -#define TYPE_A_DRIVING 0x00 -#define TYPE_B_DRIVING 0x01 -#define TYPE_C_DRIVING 0x02 -#define TYPE_D_DRIVING 0x03 - -/* SD_SAMPLE_POINT_CTL */ -#define DDR_FIX_RX_DAT 0x00 -#define DDR_VAR_RX_DAT 0x80 -#define DDR_FIX_RX_DAT_EDGE 0x00 -#define DDR_FIX_RX_DAT_14_DELAY 0x40 -#define DDR_FIX_RX_CMD 0x00 -#define DDR_VAR_RX_CMD 0x20 -#define DDR_FIX_RX_CMD_POS_EDGE 0x00 -#define DDR_FIX_RX_CMD_14_DELAY 0x10 -#define SD20_RX_POS_EDGE 0x00 -#define SD20_RX_14_DELAY 0x08 -#define SD20_RX_SEL_MASK 0x08 +#define TRANS_NOT_READY 0 +#define TRANS_RESULT_OK 1 +#define TRANS_RESULT_FAIL 2 +#define TRANS_NO_DEVICE 3 -/* SD_PUSH_POINT_CTL */ -#define DDR_FIX_TX_CMD_DAT 0x00 -#define DDR_VAR_TX_CMD_DAT 0x80 -#define DDR_FIX_TX_DAT_14_TSU 0x00 -#define DDR_FIX_TX_DAT_12_TSU 0x40 -#define DDR_FIX_TX_CMD_NEG_EDGE 0x00 -#define DDR_FIX_TX_CMD_14_AHEAD 0x20 -#define SD20_TX_NEG_EDGE 0x00 -#define SD20_TX_14_AHEAD 0x10 -#define SD20_TX_SEL_MASK 0x10 -#define DDR_VAR_SDCLK_POL_SWAP 0x01 - -/* SD_TRANSFER */ -#define SD_TRANSFER_START 0x80 -#define SD_TRANSFER_END 0x40 -#define SD_STAT_IDLE 0x20 -#define SD_TRANSFER_ERR 0x10 -/* SD Transfer Mode definition */ -#define SD_TM_NORMAL_WRITE 0x00 -#define SD_TM_AUTO_WRITE_3 0x01 -#define SD_TM_AUTO_WRITE_4 0x02 -#define SD_TM_AUTO_READ_3 0x05 -#define SD_TM_AUTO_READ_4 0x06 -#define SD_TM_CMD_RSP 0x08 -#define SD_TM_AUTO_WRITE_1 0x09 -#define SD_TM_AUTO_WRITE_2 0x0A -#define SD_TM_NORMAL_READ 0x0C -#define SD_TM_AUTO_READ_1 0x0D -#define SD_TM_AUTO_READ_2 0x0E -#define SD_TM_AUTO_TUNING 0x0F - -/* SD_VPTX_CTL / SD_VPRX_CTL */ -#define PHASE_CHANGE 0x80 -#define PHASE_NOT_RESET 0x40 - -/* SD_DCMPS_TX_CTL / SD_DCMPS_RX_CTL */ -#define DCMPS_CHANGE 0x80 -#define DCMPS_CHANGE_DONE 0x40 -#define DCMPS_ERROR 0x20 -#define DCMPS_CURRENT_PHASE 0x1F - -/* SD Configure 1 Register */ -#define SD_CLK_DIVIDE_0 0x00 -#define SD_CLK_DIVIDE_256 0xC0 -#define SD_CLK_DIVIDE_128 0x80 -#define SD_BUS_WIDTH_1BIT 0x00 -#define SD_BUS_WIDTH_4BIT 0x01 -#define SD_BUS_WIDTH_8BIT 0x02 -#define SD_ASYNC_FIFO_NOT_RST 0x10 -#define SD_20_MODE 0x00 -#define SD_DDR_MODE 0x04 -#define SD_30_MODE 0x08 - -#define SD_CLK_DIVIDE_MASK 0xC0 - -/* SD_CMD_STATE */ -#define SD_CMD_IDLE 0x80 - -/* SD_DATA_STATE */ -#define SD_DATA_IDLE 0x80 - -/* DCM_DRP_CTL */ -#define DCM_RESET 0x08 -#define DCM_LOCKED 0x04 -#define DCM_208M 0x00 -#define DCM_TX 0x01 -#define DCM_RX 0x02 - -/* DCM_DRP_TRIG */ -#define DRP_START 0x80 -#define DRP_DONE 0x40 - -/* DCM_DRP_CFG */ -#define DRP_WRITE 0x80 -#define DRP_READ 0x00 -#define DCM_WRITE_ADDRESS_50 0x50 -#define DCM_WRITE_ADDRESS_51 0x51 -#define DCM_READ_ADDRESS_00 0x00 -#define DCM_READ_ADDRESS_51 0x51 - -/* IRQSTAT0 */ -#define DMA_DONE_INT 0x80 -#define SUSPEND_INT 0x40 -#define LINK_RDY_INT 0x20 -#define LINK_DOWN_INT 0x10 - -/* DMACTL */ -#define DMA_RST 0x80 -#define DMA_BUSY 0x04 -#define DMA_DIR_TO_CARD 0x00 -#define DMA_DIR_FROM_CARD 0x02 -#define DMA_EN 0x01 -#define DMA_128 (0 << 4) -#define DMA_256 (1 << 4) -#define DMA_512 (2 << 4) -#define DMA_1024 (3 << 4) -#define DMA_PACK_SIZE_MASK 0x30 - -/* SSC_CTL1 */ -#define SSC_RSTB 0x80 -#define SSC_8X_EN 0x40 -#define SSC_FIX_FRAC 0x20 -#define SSC_SEL_1M 0x00 -#define SSC_SEL_2M 0x08 -#define SSC_SEL_4M 0x10 -#define SSC_SEL_8M 0x18 - -/* SSC_CTL2 */ -#define SSC_DEPTH_MASK 0x07 -#define SSC_DEPTH_DISALBE 0x00 -#define SSC_DEPTH_4M 0x01 -#define SSC_DEPTH_2M 0x02 -#define SSC_DEPTH_1M 0x03 -#define SSC_DEPTH_500K 0x04 -#define SSC_DEPTH_250K 0x05 - -/* System Clock Control Register */ -#define CLK_LOW_FREQ 0x01 - -/* System Clock Divider Register */ -#define CLK_DIV_1 0x01 -#define CLK_DIV_2 0x02 -#define CLK_DIV_4 0x03 -#define CLK_DIV_8 0x04 - -/* MS_CFG */ -#define SAMPLE_TIME_RISING 0x00 -#define SAMPLE_TIME_FALLING 0x80 -#define PUSH_TIME_DEFAULT 0x00 -#define PUSH_TIME_ODD 0x40 -#define NO_EXTEND_TOGGLE 0x00 -#define EXTEND_TOGGLE_CHK 0x20 -#define MS_BUS_WIDTH_1 0x00 -#define MS_BUS_WIDTH_4 0x10 -#define MS_BUS_WIDTH_8 0x18 -#define MS_2K_SECTOR_MODE 0x04 -#define MS_512_SECTOR_MODE 0x00 -#define MS_TOGGLE_TIMEOUT_EN 0x00 -#define MS_TOGGLE_TIMEOUT_DISEN 0x01 -#define MS_NO_CHECK_INT 0x02 +#define RTSX_RESV_BUF_LEN 4096 +#define HOST_CMDS_BUF_LEN 1024 +#define HOST_SG_TBL_BUF_LEN (RTSX_RESV_BUF_LEN - HOST_CMDS_BUF_LEN) +#define HOST_SG_TBL_ITEMS (HOST_SG_TBL_BUF_LEN / 8) +#define MAX_SG_ITEM_LEN 0x80000 +#define HOST_TO_DEVICE 0 +#define DEVICE_TO_HOST 1 + +#define OUTPUT_3V3 0 +#define OUTPUT_1V8 1 -/* MS_TRANS_CFG */ -#define WAIT_INT 0x80 -#define NO_WAIT_INT 0x00 -#define NO_AUTO_READ_INT_REG 0x00 -#define AUTO_READ_INT_REG 0x40 -#define MS_CRC16_ERR 0x20 -#define MS_RDY_TIMEOUT 0x10 -#define MS_INT_CMDNK 0x08 -#define MS_INT_BREQ 0x04 -#define MS_INT_ERR 0x02 -#define MS_INT_CED 0x01 - -/* MS_TRANSFER */ -#define MS_TRANSFER_START 0x80 -#define MS_TRANSFER_END 0x40 -#define MS_TRANSFER_ERR 0x20 -#define MS_BS_STATE 0x10 -#define MS_TM_READ_BYTES 0x00 -#define MS_TM_NORMAL_READ 0x01 -#define MS_TM_WRITE_BYTES 0x04 -#define MS_TM_NORMAL_WRITE 0x05 -#define MS_TM_AUTO_READ 0x08 -#define MS_TM_AUTO_WRITE 0x0C - -/* SD Configure 2 Register */ -#define SD_CALCULATE_CRC7 0x00 -#define SD_NO_CALCULATE_CRC7 0x80 -#define SD_CHECK_CRC16 0x00 -#define SD_NO_CHECK_CRC16 0x40 -#define SD_NO_CHECK_WAIT_CRC_TO 0x20 -#define SD_WAIT_BUSY_END 0x08 -#define SD_NO_WAIT_BUSY_END 0x00 -#define SD_CHECK_CRC7 0x00 -#define SD_NO_CHECK_CRC7 0x04 -#define SD_RSP_LEN_0 0x00 -#define SD_RSP_LEN_6 0x01 -#define SD_RSP_LEN_17 0x02 -/* SD/MMC Response Type Definition */ -#define SD_RSP_TYPE_R0 0x04 -#define SD_RSP_TYPE_R1 0x01 -#define SD_RSP_TYPE_R1b 0x09 -#define SD_RSP_TYPE_R2 0x02 -#define SD_RSP_TYPE_R3 0x05 -#define SD_RSP_TYPE_R4 0x05 -#define SD_RSP_TYPE_R5 0x01 -#define SD_RSP_TYPE_R6 0x01 -#define SD_RSP_TYPE_R7 0x01 - -/* SD_CONFIGURE3 */ -#define SD_RSP_80CLK_TIMEOUT_EN 0x01 - -/* Card Transfer Reset Register */ -#define SPI_STOP 0x01 -#define XD_STOP 0x02 -#define SD_STOP 0x04 -#define MS_STOP 0x08 -#define SPI_CLR_ERR 0x10 -#define XD_CLR_ERR 0x20 -#define SD_CLR_ERR 0x40 -#define MS_CLR_ERR 0x80 - -/* Card Data Source Register */ -#define PINGPONG_BUFFER 0x01 -#define RING_BUFFER 0x00 - -/* Card Power Control Register */ -#define PMOS_STRG_MASK 0x10 -#define PMOS_STRG_800mA 0x10 -#define PMOS_STRG_400mA 0x00 -#define SD_POWER_OFF 0x03 -#define SD_PARTIAL_POWER_ON 0x01 -#define SD_POWER_ON 0x00 -#define SD_POWER_MASK 0x03 -#define MS_POWER_OFF 0x0C -#define MS_PARTIAL_POWER_ON 0x04 -#define MS_POWER_ON 0x00 -#define MS_POWER_MASK 0x0C -#define BPP_POWER_OFF 0x0F -#define BPP_POWER_5_PERCENT_ON 0x0E -#define BPP_POWER_10_PERCENT_ON 0x0C -#define BPP_POWER_15_PERCENT_ON 0x08 -#define BPP_POWER_ON 0x00 -#define BPP_POWER_MASK 0x0F -#define SD_VCC_PARTIAL_POWER_ON 0x02 -#define SD_VCC_POWER_ON 0x00 - -/* PWR_GATE_CTRL */ -#define PWR_GATE_EN 0x01 -#define LDO3318_PWR_MASK 0x06 -#define LDO_ON 0x00 -#define LDO_SUSPEND 0x04 -#define LDO_OFF 0x06 - -/* CARD_CLK_SOURCE */ -#define CRC_FIX_CLK (0x00 << 0) -#define CRC_VAR_CLK0 (0x01 << 0) -#define CRC_VAR_CLK1 (0x02 << 0) -#define SD30_FIX_CLK (0x00 << 2) -#define SD30_VAR_CLK0 (0x01 << 2) -#define SD30_VAR_CLK1 (0x02 << 2) -#define SAMPLE_FIX_CLK (0x00 << 4) -#define SAMPLE_VAR_CLK0 (0x01 << 4) -#define SAMPLE_VAR_CLK1 (0x02 << 4) - -/* HOST_SLEEP_STATE */ -#define HOST_ENTER_S1 1 -#define HOST_ENTER_S3 2 +#define RTSX_PHASE_MAX 32 +#define RX_TUNING_CNT 3 #define MS_CFG 0xFD40 +#define SAMPLE_TIME_RISING 0x00 +#define SAMPLE_TIME_FALLING 0x80 +#define PUSH_TIME_DEFAULT 0x00 +#define PUSH_TIME_ODD 0x40 +#define NO_EXTEND_TOGGLE 0x00 +#define EXTEND_TOGGLE_CHK 0x20 +#define MS_BUS_WIDTH_1 0x00 +#define MS_BUS_WIDTH_4 0x10 +#define MS_BUS_WIDTH_8 0x18 +#define MS_2K_SECTOR_MODE 0x04 +#define MS_512_SECTOR_MODE 0x00 +#define MS_TOGGLE_TIMEOUT_EN 0x00 +#define MS_TOGGLE_TIMEOUT_DISEN 0x01 +#define MS_NO_CHECK_INT 0x02 #define MS_TPC 0xFD41 #define MS_TRANS_CFG 0xFD42 +#define WAIT_INT 0x80 +#define NO_WAIT_INT 0x00 +#define NO_AUTO_READ_INT_REG 0x00 +#define AUTO_READ_INT_REG 0x40 +#define MS_CRC16_ERR 0x20 +#define MS_RDY_TIMEOUT 0x10 +#define MS_INT_CMDNK 0x08 +#define MS_INT_BREQ 0x04 +#define MS_INT_ERR 0x02 +#define MS_INT_CED 0x01 #define MS_TRANSFER 0xFD43 +#define MS_TRANSFER_START 0x80 +#define MS_TRANSFER_END 0x40 +#define MS_TRANSFER_ERR 0x20 +#define MS_BS_STATE 0x10 +#define MS_TM_READ_BYTES 0x00 +#define MS_TM_NORMAL_READ 0x01 +#define MS_TM_WRITE_BYTES 0x04 +#define MS_TM_NORMAL_WRITE 0x05 +#define MS_TM_AUTO_READ 0x08 +#define MS_TM_AUTO_WRITE 0x0C #define MS_INT_REG 0xFD44 #define MS_BYTE_CNT 0xFD45 #define MS_SECTOR_CNT_L 0xFD46 @@ -549,14 +192,90 @@ #define MS_DBUS_H 0xFD48 #define SD_CFG1 0xFDA0 +#define SD_CLK_DIVIDE_0 0x00 +#define SD_CLK_DIVIDE_256 0xC0 +#define SD_CLK_DIVIDE_128 0x80 +#define SD_BUS_WIDTH_1BIT 0x00 +#define SD_BUS_WIDTH_4BIT 0x01 +#define SD_BUS_WIDTH_8BIT 0x02 +#define SD_ASYNC_FIFO_NOT_RST 0x10 +#define SD_20_MODE 0x00 +#define SD_DDR_MODE 0x04 +#define SD_30_MODE 0x08 +#define SD_CLK_DIVIDE_MASK 0xC0 #define SD_CFG2 0xFDA1 +#define SD_CALCULATE_CRC7 0x00 +#define SD_NO_CALCULATE_CRC7 0x80 +#define SD_CHECK_CRC16 0x00 +#define SD_NO_CHECK_CRC16 0x40 +#define SD_NO_CHECK_WAIT_CRC_TO 0x20 +#define SD_WAIT_BUSY_END 0x08 +#define SD_NO_WAIT_BUSY_END 0x00 +#define SD_CHECK_CRC7 0x00 +#define SD_NO_CHECK_CRC7 0x04 +#define SD_RSP_LEN_0 0x00 +#define SD_RSP_LEN_6 0x01 +#define SD_RSP_LEN_17 0x02 +#define SD_RSP_TYPE_R0 0x04 +#define SD_RSP_TYPE_R1 0x01 +#define SD_RSP_TYPE_R1b 0x09 +#define SD_RSP_TYPE_R2 0x02 +#define SD_RSP_TYPE_R3 0x05 +#define SD_RSP_TYPE_R4 0x05 +#define SD_RSP_TYPE_R5 0x01 +#define SD_RSP_TYPE_R6 0x01 +#define SD_RSP_TYPE_R7 0x01 #define SD_CFG3 0xFDA2 +#define SD_RSP_80CLK_TIMEOUT_EN 0x01 + #define SD_STAT1 0xFDA3 +#define SD_CRC7_ERR 0x80 +#define SD_CRC16_ERR 0x40 +#define SD_CRC_WRITE_ERR 0x20 +#define SD_CRC_WRITE_ERR_MASK 0x1C +#define GET_CRC_TIME_OUT 0x02 +#define SD_TUNING_COMPARE_ERR 0x01 #define SD_STAT2 0xFDA4 +#define SD_RSP_80CLK_TIMEOUT 0x01 + #define SD_BUS_STAT 0xFDA5 +#define SD_CLK_TOGGLE_EN 0x80 +#define SD_CLK_FORCE_STOP 0x40 +#define SD_DAT3_STATUS 0x10 +#define SD_DAT2_STATUS 0x08 +#define SD_DAT1_STATUS 0x04 +#define SD_DAT0_STATUS 0x02 +#define SD_CMD_STATUS 0x01 #define SD_PAD_CTL 0xFDA6 +#define SD_IO_USING_1V8 0x80 +#define SD_IO_USING_3V3 0x7F +#define TYPE_A_DRIVING 0x00 +#define TYPE_B_DRIVING 0x01 +#define TYPE_C_DRIVING 0x02 +#define TYPE_D_DRIVING 0x03 #define SD_SAMPLE_POINT_CTL 0xFDA7 +#define DDR_FIX_RX_DAT 0x00 +#define DDR_VAR_RX_DAT 0x80 +#define DDR_FIX_RX_DAT_EDGE 0x00 +#define DDR_FIX_RX_DAT_14_DELAY 0x40 +#define DDR_FIX_RX_CMD 0x00 +#define DDR_VAR_RX_CMD 0x20 +#define DDR_FIX_RX_CMD_POS_EDGE 0x00 +#define DDR_FIX_RX_CMD_14_DELAY 0x10 +#define SD20_RX_POS_EDGE 0x00 +#define SD20_RX_14_DELAY 0x08 +#define SD20_RX_SEL_MASK 0x08 #define SD_PUSH_POINT_CTL 0xFDA8 +#define DDR_FIX_TX_CMD_DAT 0x00 +#define DDR_VAR_TX_CMD_DAT 0x80 +#define DDR_FIX_TX_DAT_14_TSU 0x00 +#define DDR_FIX_TX_DAT_12_TSU 0x40 +#define DDR_FIX_TX_CMD_NEG_EDGE 0x00 +#define DDR_FIX_TX_CMD_14_AHEAD 0x20 +#define SD20_TX_NEG_EDGE 0x00 +#define SD20_TX_14_AHEAD 0x10 +#define SD20_TX_SEL_MASK 0x10 +#define DDR_VAR_SDCLK_POL_SWAP 0x01 #define SD_CMD0 0xFDA9 #define SD_CMD_START 0x40 #define SD_CMD1 0xFDAA @@ -569,14 +288,46 @@ #define SD_BLOCK_CNT_L 0xFDB1 #define SD_BLOCK_CNT_H 0xFDB2 #define SD_TRANSFER 0xFDB3 +#define SD_TRANSFER_START 0x80 +#define SD_TRANSFER_END 0x40 +#define SD_STAT_IDLE 0x20 +#define SD_TRANSFER_ERR 0x10 +#define SD_TM_NORMAL_WRITE 0x00 +#define SD_TM_AUTO_WRITE_3 0x01 +#define SD_TM_AUTO_WRITE_4 0x02 +#define SD_TM_AUTO_READ_3 0x05 +#define SD_TM_AUTO_READ_4 0x06 +#define SD_TM_CMD_RSP 0x08 +#define SD_TM_AUTO_WRITE_1 0x09 +#define SD_TM_AUTO_WRITE_2 0x0A +#define SD_TM_NORMAL_READ 0x0C +#define SD_TM_AUTO_READ_1 0x0D +#define SD_TM_AUTO_READ_2 0x0E +#define SD_TM_AUTO_TUNING 0x0F #define SD_CMD_STATE 0xFDB5 +#define SD_CMD_IDLE 0x80 + #define SD_DATA_STATE 0xFDB6 +#define SD_DATA_IDLE 0x80 #define SRCTL 0xFC13 #define DCM_DRP_CTL 0xFC23 +#define DCM_RESET 0x08 +#define DCM_LOCKED 0x04 +#define DCM_208M 0x00 +#define DCM_TX 0x01 +#define DCM_RX 0x02 #define DCM_DRP_TRIG 0xFC24 +#define DRP_START 0x80 +#define DRP_DONE 0x40 #define DCM_DRP_CFG 0xFC25 +#define DRP_WRITE 0x80 +#define DRP_READ 0x00 +#define DCM_WRITE_ADDRESS_50 0x50 +#define DCM_WRITE_ADDRESS_51 0x51 +#define DCM_READ_ADDRESS_00 0x00 +#define DCM_READ_ADDRESS_51 0x51 #define DCM_DRP_WR_DATA_L 0xFC26 #define DCM_DRP_WR_DATA_H 0xFC27 #define DCM_DRP_RD_DATA_L 0xFC28 @@ -587,42 +338,153 @@ #define SD_DCMPS1_CTL 0xFC2D #define SD_VPTX_CTL SD_VPCLK0_CTL #define SD_VPRX_CTL SD_VPCLK1_CTL +#define PHASE_CHANGE 0x80 +#define PHASE_NOT_RESET 0x40 #define SD_DCMPS_TX_CTL SD_DCMPS0_CTL #define SD_DCMPS_RX_CTL SD_DCMPS1_CTL +#define DCMPS_CHANGE 0x80 +#define DCMPS_CHANGE_DONE 0x40 +#define DCMPS_ERROR 0x20 +#define DCMPS_CURRENT_PHASE 0x1F #define CARD_CLK_SOURCE 0xFC2E - +#define CRC_FIX_CLK (0x00 << 0) +#define CRC_VAR_CLK0 (0x01 << 0) +#define CRC_VAR_CLK1 (0x02 << 0) +#define SD30_FIX_CLK (0x00 << 2) +#define SD30_VAR_CLK0 (0x01 << 2) +#define SD30_VAR_CLK1 (0x02 << 2) +#define SAMPLE_FIX_CLK (0x00 << 4) +#define SAMPLE_VAR_CLK0 (0x01 << 4) +#define SAMPLE_VAR_CLK1 (0x02 << 4) #define CARD_PWR_CTL 0xFD50 +#define PMOS_STRG_MASK 0x10 +#define PMOS_STRG_800mA 0x10 +#define PMOS_STRG_400mA 0x00 +#define SD_POWER_OFF 0x03 +#define SD_PARTIAL_POWER_ON 0x01 +#define SD_POWER_ON 0x00 +#define SD_POWER_MASK 0x03 +#define MS_POWER_OFF 0x0C +#define MS_PARTIAL_POWER_ON 0x04 +#define MS_POWER_ON 0x00 +#define MS_POWER_MASK 0x0C +#define BPP_POWER_OFF 0x0F +#define BPP_POWER_5_PERCENT_ON 0x0E +#define BPP_POWER_10_PERCENT_ON 0x0C +#define BPP_POWER_15_PERCENT_ON 0x08 +#define BPP_POWER_ON 0x00 +#define BPP_POWER_MASK 0x0F +#define SD_VCC_PARTIAL_POWER_ON 0x02 +#define SD_VCC_POWER_ON 0x00 #define CARD_CLK_SWITCH 0xFD51 #define RTL8411B_PACKAGE_MODE 0xFD51 #define CARD_SHARE_MODE 0xFD52 +#define CARD_SHARE_MASK 0x0F +#define CARD_SHARE_MULTI_LUN 0x00 +#define CARD_SHARE_NORMAL 0x00 +#define CARD_SHARE_48_SD 0x04 +#define CARD_SHARE_48_MS 0x08 +#define CARD_SHARE_BAROSSA_SD 0x01 +#define CARD_SHARE_BAROSSA_MS 0x02 #define CARD_DRIVE_SEL 0xFD53 +#define MS_DRIVE_8mA (0x01 << 6) +#define MMC_DRIVE_8mA (0x01 << 4) +#define XD_DRIVE_8mA (0x01 << 2) +#define GPIO_DRIVE_8mA 0x01 +#define RTS5209_CARD_DRIVE_DEFAULT (MS_DRIVE_8mA | MMC_DRIVE_8mA |\ + XD_DRIVE_8mA | GPIO_DRIVE_8mA) +#define RTL8411_CARD_DRIVE_DEFAULT (MS_DRIVE_8mA | MMC_DRIVE_8mA |\ + XD_DRIVE_8mA) +#define RTSX_CARD_DRIVE_DEFAULT (MS_DRIVE_8mA | GPIO_DRIVE_8mA) + #define CARD_STOP 0xFD54 +#define SPI_STOP 0x01 +#define XD_STOP 0x02 +#define SD_STOP 0x04 +#define MS_STOP 0x08 +#define SPI_CLR_ERR 0x10 +#define XD_CLR_ERR 0x20 +#define SD_CLR_ERR 0x40 +#define MS_CLR_ERR 0x80 #define CARD_OE 0xFD55 +#define SD_OUTPUT_EN 0x04 +#define MS_OUTPUT_EN 0x08 #define CARD_AUTO_BLINK 0xFD56 #define CARD_GPIO_DIR 0xFD57 #define CARD_GPIO 0xFD58 #define CARD_DATA_SOURCE 0xFD5B +#define PINGPONG_BUFFER 0x01 +#define RING_BUFFER 0x00 #define SD30_CLK_DRIVE_SEL 0xFD5A +#define DRIVER_TYPE_A 0x05 +#define DRIVER_TYPE_B 0x03 +#define DRIVER_TYPE_C 0x02 +#define DRIVER_TYPE_D 0x01 #define CARD_SELECT 0xFD5C +#define SD_MOD_SEL 2 +#define MS_MOD_SEL 3 #define SD30_DRIVE_SEL 0xFD5E +#define CFG_DRIVER_TYPE_A 0x02 +#define CFG_DRIVER_TYPE_B 0x03 +#define CFG_DRIVER_TYPE_C 0x01 +#define CFG_DRIVER_TYPE_D 0x00 #define SD30_CMD_DRIVE_SEL 0xFD5E #define SD30_DAT_DRIVE_SEL 0xFD5F #define CARD_CLK_EN 0xFD69 +#define SD_CLK_EN 0x04 +#define MS_CLK_EN 0x08 #define SDIO_CTRL 0xFD6B #define CD_PAD_CTL 0xFD73 - +#define CD_DISABLE_MASK 0x07 +#define MS_CD_DISABLE 0x04 +#define SD_CD_DISABLE 0x02 +#define XD_CD_DISABLE 0x01 +#define CD_DISABLE 0x07 +#define CD_ENABLE 0x00 +#define MS_CD_EN_ONLY 0x03 +#define SD_CD_EN_ONLY 0x05 +#define XD_CD_EN_ONLY 0x06 +#define FORCE_CD_LOW_MASK 0x38 +#define FORCE_CD_XD_LOW 0x08 +#define FORCE_CD_SD_LOW 0x10 +#define FORCE_CD_MS_LOW 0x20 +#define CD_AUTO_DISABLE 0x40 #define FPDCTL 0xFC00 +#define SSC_POWER_DOWN 0x01 +#define SD_OC_POWER_DOWN 0x02 +#define ALL_POWER_DOWN 0x07 +#define OC_POWER_DOWN 0x06 #define PDINFO 0xFC01 #define CLK_CTL 0xFC02 +#define CHANGE_CLK 0x01 +#define CLK_LOW_FREQ 0x01 + #define CLK_DIV 0xFC03 +#define CLK_DIV_1 0x01 +#define CLK_DIV_2 0x02 +#define CLK_DIV_4 0x03 +#define CLK_DIV_8 0x04 #define CLK_SEL 0xFC04 #define SSC_DIV_N_0 0xFC0F #define SSC_DIV_N_1 0xFC10 #define SSC_CTL1 0xFC11 +#define SSC_RSTB 0x80 +#define SSC_8X_EN 0x40 +#define SSC_FIX_FRAC 0x20 +#define SSC_SEL_1M 0x00 +#define SSC_SEL_2M 0x08 +#define SSC_SEL_4M 0x10 +#define SSC_SEL_8M 0x18 #define SSC_CTL2 0xFC12 - +#define SSC_DEPTH_MASK 0x07 +#define SSC_DEPTH_DISALBE 0x00 +#define SSC_DEPTH_4M 0x01 +#define SSC_DEPTH_2M 0x02 +#define SSC_DEPTH_1M 0x03 +#define SSC_DEPTH_500K 0x04 +#define SSC_DEPTH_250K 0x05 #define RCCTL 0xFC14 #define FPGA_PULL_CTL 0xFC1D @@ -630,6 +492,24 @@ #define GPIO_CTL 0xFC1F #define LDO_CTL 0xFC1E +#define BPP_ASIC_1V7 0x00 +#define BPP_ASIC_1V8 0x01 +#define BPP_ASIC_1V9 0x02 +#define BPP_ASIC_2V0 0x03 +#define BPP_ASIC_2V7 0x04 +#define BPP_ASIC_2V8 0x05 +#define BPP_ASIC_3V2 0x06 +#define BPP_ASIC_3V3 0x07 +#define BPP_REG_TUNED18 0x07 +#define BPP_TUNED18_SHIFT_8402 5 +#define BPP_TUNED18_SHIFT_8411 4 +#define BPP_PAD_MASK 0x04 +#define BPP_PAD_3V3 0x04 +#define BPP_PAD_1V8 0x00 +#define BPP_LDO_POWB 0x03 +#define BPP_LDO_ON 0x00 +#define BPP_LDO_SUSPEND 0x02 +#define BPP_LDO_OFF 0x03 #define SYS_VER 0xFC32 #define CARD_PULL_CTL1 0xFD60 @@ -642,6 +522,10 @@ /* PCI Express Related Registers */ #define IRQEN0 0xFE20 #define IRQSTAT0 0xFE21 +#define DMA_DONE_INT 0x80 +#define SUSPEND_INT 0x40 +#define LINK_RDY_INT 0x20 +#define LINK_DOWN_INT 0x10 #define IRQEN1 0xFE22 #define IRQSTAT1 0xFE23 #define TLPRIEN 0xFE24 @@ -653,6 +537,16 @@ #define DMATC2 0xFE2A #define DMATC3 0xFE2B #define DMACTL 0xFE2C +#define DMA_RST 0x80 +#define DMA_BUSY 0x04 +#define DMA_DIR_TO_CARD 0x00 +#define DMA_DIR_FROM_CARD 0x02 +#define DMA_EN 0x01 +#define DMA_128 (0 << 4) +#define DMA_256 (1 << 4) +#define DMA_512 (2 << 4) +#define DMA_1024 (3 << 4) +#define DMA_PACK_SIZE_MASK 0x30 #define BCTL 0xFE2D #define RBBC0 0xFE2E #define RBBC1 0xFE2F @@ -693,11 +587,19 @@ #define RESET_LOAD_REG 0xFE5E #define EFUSE_CONTENT 0xFE5F #define HOST_SLEEP_STATE 0xFE60 +#define HOST_ENTER_S1 1 +#define HOST_ENTER_S3 2 + #define SDIO_CFG 0xFE70 #define NFTS_TX_CTRL 0xFE72 #define PWR_GATE_CTRL 0xFE75 +#define PWR_GATE_EN 0x01 +#define LDO3318_PWR_MASK 0x06 +#define LDO_ON 0x00 +#define LDO_SUSPEND 0x04 +#define LDO_OFF 0x06 #define PWD_SUSPEND_EN 0xFE76 #define LDO_PWR_SEL 0xFE78 -- cgit v1.2.3 From 9e33ce79f828eb5a1bb9dd4830c7fa719d4279dc Mon Sep 17 00:00:00 2001 From: Micky Ching Date: Wed, 25 Feb 2015 13:50:10 +0800 Subject: mfd: rtsx: Update PETXCFG address PETXCFG is defined at 0xFF03, the old 0xFE49 not used any more. Signed-off-by: Micky Ching Signed-off-by: Lee Jones --- include/linux/mfd/rtsx_pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h index e81f2bbfcda0..87cff60953f6 100644 --- a/include/linux/mfd/rtsx_pci.h +++ b/include/linux/mfd/rtsx_pci.h @@ -572,7 +572,6 @@ #define MSGTXDATA2 0xFE46 #define MSGTXDATA3 0xFE47 #define MSGTXCTL 0xFE48 -#define PETXCFG 0xFE49 #define LTR_CTL 0xFE4A #define OBFF_CFG 0xFE4C @@ -606,6 +605,7 @@ #define DUMMY_REG_RESET_0 0xFE90 #define AUTOLOAD_CFG_BASE 0xFF00 +#define PETXCFG 0xFF03 #define PM_CTRL1 0xFF44 #define PM_CTRL2 0xFF45 -- cgit v1.2.3 From b038538104d5b033d3beaffba6d6efe01246930b Mon Sep 17 00:00:00 2001 From: Micky Ching Date: Wed, 25 Feb 2015 13:50:12 +0800 Subject: mfd: rtsx: Update phy register Update some phy register name and value for rts5249, the updated value makes chip more stable on some platform. Signed-off-by: Micky Ching Signed-off-by: Lee Jones --- include/linux/mfd/rtsx_pci.h | 109 ++++++++++++++++++++++--------------------- 1 file changed, 55 insertions(+), 54 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h index 87cff60953f6..0103210ec3e1 100644 --- a/include/linux/mfd/rtsx_pci.h +++ b/include/linux/mfd/rtsx_pci.h @@ -630,16 +630,47 @@ /* Phy register */ #define PHY_PCR 0x00 +#define PHY_PCR_FORCE_CODE 0xB000 +#define PHY_PCR_OOBS_CALI_50 0x0800 +#define PHY_PCR_OOBS_VCM_08 0x0200 +#define PHY_PCR_OOBS_SEN_90 0x0040 +#define PHY_PCR_RSSI_EN 0x0002 +#define PHY_PCR_RX10K 0x0001 + #define PHY_RCR0 0x01 #define PHY_RCR1 0x02 +#define PHY_RCR1_ADP_TIME_4 0x0400 +#define PHY_RCR1_VCO_COARSE 0x001F + #define PHY_RCR2 0x03 +#define PHY_RCR2_EMPHASE_EN 0x8000 +#define PHY_RCR2_NADJR 0x4000 +#define PHY_RCR2_CDR_SR_2 0x0100 +#define PHY_RCR2_FREQSEL_12 0x0040 +#define PHY_RCR2_CDR_SC_12P 0x0010 +#define PHY_RCR2_CALIB_LATE 0x0002 + #define PHY_RTCR 0x04 #define PHY_RDR 0x05 +#define PHY_RDR_RXDSEL_1_9 0x4000 +#define PHY_SSC_AUTO_PWD 0x0600 #define PHY_TCR0 0x06 #define PHY_TCR1 0x07 #define PHY_TUNE 0x08 +#define PHY_TUNE_TUNEREF_1_0 0x4000 +#define PHY_TUNE_VBGSEL_1252 0x0C00 +#define PHY_TUNE_SDBUS_33 0x0200 +#define PHY_TUNE_TUNED18 0x01C0 +#define PHY_TUNE_TUNED12 0X0020 +#define PHY_TUNE_TUNEA12 0x0004 + #define PHY_IMR 0x09 #define PHY_BPCR 0x0A +#define PHY_BPCR_IBRXSEL 0x0400 +#define PHY_BPCR_IBTXSEL 0x0100 +#define PHY_BPCR_IB_FILTER 0x0080 +#define PHY_BPCR_CMIRROR_EN 0x0040 + #define PHY_BIST 0x0B #define PHY_RAW_L 0x0C #define PHY_RAW_H 0x0D @@ -654,12 +685,35 @@ #define PHY_BPNR 0x16 #define PHY_BRNR2 0x17 #define PHY_BENR 0x18 -#define PHY_REG_REV 0x19 +#define PHY_REV 0x19 +#define PHY_REV_RESV 0xE000 +#define PHY_REV_RXIDLE_LATCHED 0x1000 +#define PHY_REV_P1_EN 0x0800 +#define PHY_REV_RXIDLE_EN 0x0400 +#define PHY_REV_CLKREQ_TX_EN 0x0200 +#define PHY_REV_CLKREQ_RX_EN 0x0100 +#define PHY_REV_CLKREQ_DT_1_0 0x0040 +#define PHY_REV_STOP_CLKRD 0x0020 +#define PHY_REV_RX_PWST 0x0008 +#define PHY_REV_STOP_CLKWR 0x0004 + #define PHY_FLD0 0x1A #define PHY_FLD1 0x1B #define PHY_FLD2 0x1C #define PHY_FLD3 0x1D +#define PHY_FLD3_TIMER_4 0x0800 +#define PHY_FLD3_TIMER_6 0x0020 +#define PHY_FLD3_RXDELINK 0x0004 + #define PHY_FLD4 0x1E +#define PHY_FLD4_FLDEN_SEL 0x4000 +#define PHY_FLD4_REQ_REF 0x2000 +#define PHY_FLD4_RXAMP_OFF 0x1000 +#define PHY_FLD4_REQ_ADDA 0x0800 +#define PHY_FLD4_BER_COUNT 0x00E0 +#define PHY_FLD4_BER_TIMER 0x000A +#define PHY_FLD4_BER_CHK_EN 0x0001 + #define PHY_DUM_REG 0x1F #define LCTLR 0x80 @@ -675,59 +729,6 @@ #define PCR_SETTING_REG2 0x814 #define PCR_SETTING_REG3 0x747 -/* Phy bits */ -#define PHY_PCR_FORCE_CODE 0xB000 -#define PHY_PCR_OOBS_CALI_50 0x0800 -#define PHY_PCR_OOBS_VCM_08 0x0200 -#define PHY_PCR_OOBS_SEN_90 0x0040 -#define PHY_PCR_RSSI_EN 0x0002 - -#define PHY_RCR1_ADP_TIME 0x0100 -#define PHY_RCR1_VCO_COARSE 0x001F - -#define PHY_RCR2_EMPHASE_EN 0x8000 -#define PHY_RCR2_NADJR 0x4000 -#define PHY_RCR2_CDR_CP_10 0x0400 -#define PHY_RCR2_CDR_SR_2 0x0100 -#define PHY_RCR2_FREQSEL_12 0x0040 -#define PHY_RCR2_CPADJEN 0x0020 -#define PHY_RCR2_CDR_SC_8 0x0008 -#define PHY_RCR2_CALIB_LATE 0x0002 - -#define PHY_RDR_RXDSEL_1_9 0x4000 - -#define PHY_TUNE_TUNEREF_1_0 0x4000 -#define PHY_TUNE_VBGSEL_1252 0x0C00 -#define PHY_TUNE_SDBUS_33 0x0200 -#define PHY_TUNE_TUNED18 0x01C0 -#define PHY_TUNE_TUNED12 0X0020 - -#define PHY_BPCR_IBRXSEL 0x0400 -#define PHY_BPCR_IBTXSEL 0x0100 -#define PHY_BPCR_IB_FILTER 0x0080 -#define PHY_BPCR_CMIRROR_EN 0x0040 - -#define PHY_REG_REV_RESV 0xE000 -#define PHY_REG_REV_RXIDLE_LATCHED 0x1000 -#define PHY_REG_REV_P1_EN 0x0800 -#define PHY_REG_REV_RXIDLE_EN 0x0400 -#define PHY_REG_REV_CLKREQ_DLY_TIMER_1_0 0x0040 -#define PHY_REG_REV_STOP_CLKRD 0x0020 -#define PHY_REG_REV_RX_PWST 0x0008 -#define PHY_REG_REV_STOP_CLKWR 0x0004 - -#define PHY_FLD3_TIMER_4 0x7800 -#define PHY_FLD3_TIMER_6 0x00E0 -#define PHY_FLD3_RXDELINK 0x0004 - -#define PHY_FLD4_FLDEN_SEL 0x4000 -#define PHY_FLD4_REQ_REF 0x2000 -#define PHY_FLD4_RXAMP_OFF 0x1000 -#define PHY_FLD4_REQ_ADDA 0x0800 -#define PHY_FLD4_BER_COUNT 0x00E0 -#define PHY_FLD4_BER_TIMER 0x000A -#define PHY_FLD4_BER_CHK_EN 0x0001 - #define rtsx_pci_init_cmd(pcr) ((pcr)->ci = 0) struct rtsx_pcr; -- cgit v1.2.3 From 19f3bd548f2750a8a7e4e6d2f25fdc5f8e2c3ee9 Mon Sep 17 00:00:00 2001 From: Micky Ching Date: Wed, 25 Feb 2015 13:50:13 +0800 Subject: mfd: rtsx: Remove LCTLR defination To enable/disable ASPM we should find LINK CONTROL register in PCI config space. All old chip use 0x80 address, but new chip may use another address, so we using pci_find_capability() to get LINK CONTROL address. rtsx_gops.c was removed, we consider to put some common operations to this file, but the actual thing is, only a group of chips are in common ops1, and another group of chips in common ops2, it is hard to decide put which ops into generic ops file. Signed-off-by: Micky Ching Signed-off-by: Lee Jones --- include/linux/mfd/rtsx_pci.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h index 0103210ec3e1..33cc63ced99e 100644 --- a/include/linux/mfd/rtsx_pci.h +++ b/include/linux/mfd/rtsx_pci.h @@ -716,15 +716,6 @@ #define PHY_DUM_REG 0x1F -#define LCTLR 0x80 -#define LCTLR_EXT_SYNC 0x80 -#define LCTLR_COMMON_CLOCK_CFG 0x40 -#define LCTLR_RETRAIN_LINK 0x20 -#define LCTLR_LINK_DISABLE 0x10 -#define LCTLR_RCB 0x08 -#define LCTLR_RESERVED 0x04 -#define LCTLR_ASPM_CTL_MASK 0x03 - #define PCR_SETTING_REG1 0x724 #define PCR_SETTING_REG2 0x814 #define PCR_SETTING_REG3 0x747 @@ -759,6 +750,7 @@ enum PDEV_STAT {PDEV_STAT_IDLE, PDEV_STAT_RUN}; struct rtsx_pcr { struct pci_dev *pci; unsigned int id; + int pcie_cap; /* pci resources */ unsigned long addr; -- cgit v1.2.3 From 663c425f2c8d87a433629f09c5afd0af7e7e550c Mon Sep 17 00:00:00 2001 From: Micky Ching Date: Wed, 25 Feb 2015 13:50:14 +0800 Subject: mfd: rtsx: Add support for rts524A add support for new chip rts524A. Signed-off-by: Micky Ching Signed-off-by: Lee Jones --- include/linux/mfd/rtsx_pci.h | 132 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 130 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h index 33cc63ced99e..754a18d4203a 100644 --- a/include/linux/mfd/rtsx_pci.h +++ b/include/linux/mfd/rtsx_pci.h @@ -577,8 +577,16 @@ #define CDRESUMECTL 0xFE52 #define WAKE_SEL_CTL 0xFE54 +#define PCLK_CTL 0xFE55 +#define PCLK_MODE_SEL 0x20 #define PME_FORCE_CTL 0xFE56 + #define ASPM_FORCE_CTL 0xFE57 +#define FORCE_ASPM_CTL0 0x10 +#define FORCE_ASPM_VAL_MASK 0x03 +#define FORCE_ASPM_L1_EN 0x02 +#define FORCE_ASPM_L0_EN 0x01 +#define FORCE_ASPM_NO_ASPM 0x00 #define PM_CLK_FORCE_CTL 0xFE58 #define FUNC_FORCE_CTL 0xFE59 #define PERST_GLITCH_WIDTH 0xFE5C @@ -590,7 +598,8 @@ #define HOST_ENTER_S3 2 #define SDIO_CFG 0xFE70 - +#define PM_EVENT_DEBUG 0xFE71 +#define PME_DEBUG_0 0x08 #define NFTS_TX_CTRL 0xFE72 #define PWR_GATE_CTRL 0xFE75 @@ -602,12 +611,19 @@ #define PWD_SUSPEND_EN 0xFE76 #define LDO_PWR_SEL 0xFE78 +#define L1SUB_CONFIG1 0xFE8D +#define L1SUB_CONFIG2 0xFE8E +#define L1SUB_AUTO_CFG 0x02 +#define L1SUB_CONFIG3 0xFE8F + #define DUMMY_REG_RESET_0 0xFE90 #define AUTOLOAD_CFG_BASE 0xFF00 #define PETXCFG 0xFF03 #define PM_CTRL1 0xFF44 +#define CD_RESUME_EN_MASK 0xF0 + #define PM_CTRL2 0xFF45 #define PM_CTRL3 0xFF46 #define SDIO_SEND_PME_EN 0x80 @@ -628,6 +644,61 @@ #define IMAGE_FLAG_ADDR0 0xCE80 #define IMAGE_FLAG_ADDR1 0xCE81 +#define RREF_CFG 0xFF6C +#define RREF_VBGSEL_MASK 0x38 +#define RREF_VBGSEL_1V25 0x28 + +#define OOBS_CONFIG 0xFF6E +#define OOBS_AUTOK_DIS 0x80 +#define OOBS_VAL_MASK 0x1F + +#define LDO_DV18_CFG 0xFF70 +#define LDO_DV18_SR_MASK 0xC0 +#define LDO_DV18_SR_DF 0x40 + +#define LDO_CONFIG2 0xFF71 +#define LDO_D3318_MASK 0x07 +#define LDO_D3318_33V 0x07 +#define LDO_D3318_18V 0x02 + +#define LDO_VCC_CFG0 0xFF72 +#define LDO_VCC_LMTVTH_MASK 0x30 +#define LDO_VCC_LMTVTH_2A 0x10 + +#define LDO_VCC_CFG1 0xFF73 +#define LDO_VCC_REF_TUNE_MASK 0x30 +#define LDO_VCC_REF_1V2 0x20 +#define LDO_VCC_TUNE_MASK 0x07 +#define LDO_VCC_1V8 0x04 +#define LDO_VCC_3V3 0x07 +#define LDO_VCC_LMT_EN 0x08 + +#define LDO_VIO_CFG 0xFF75 +#define LDO_VIO_SR_MASK 0xC0 +#define LDO_VIO_SR_DF 0x40 +#define LDO_VIO_REF_TUNE_MASK 0x30 +#define LDO_VIO_REF_1V2 0x20 +#define LDO_VIO_TUNE_MASK 0x07 +#define LDO_VIO_1V7 0x03 +#define LDO_VIO_1V8 0x04 +#define LDO_VIO_3V3 0x07 + +#define LDO_DV12S_CFG 0xFF76 +#define LDO_REF12_TUNE_MASK 0x18 +#define LDO_REF12_TUNE_DF 0x10 +#define LDO_D12_TUNE_MASK 0x07 +#define LDO_D12_TUNE_DF 0x04 + +#define LDO_AV12S_CFG 0xFF77 +#define LDO_AV12S_TUNE_MASK 0x07 +#define LDO_AV12S_TUNE_DF 0x04 + +#define SD40_LDO_CTL1 0xFE7D +#define SD40_VIO_TUNE_MASK 0x70 +#define SD40_VIO_TUNE_1V7 0x30 +#define SD_VIO_LDO_1V8 0x40 +#define SD_VIO_LDO_3V3 0x70 + /* Phy register */ #define PHY_PCR 0x00 #define PHY_PCR_FORCE_CODE 0xB000 @@ -641,6 +712,10 @@ #define PHY_RCR1 0x02 #define PHY_RCR1_ADP_TIME_4 0x0400 #define PHY_RCR1_VCO_COARSE 0x001F +#define PHY_SSCCR2 0x02 +#define PHY_SSCCR2_PLL_NCODE 0x0A00 +#define PHY_SSCCR2_TIME0 0x001C +#define PHY_SSCCR2_TIME2_WIDTH 0x0003 #define PHY_RCR2 0x03 #define PHY_RCR2_EMPHASE_EN 0x8000 @@ -649,6 +724,9 @@ #define PHY_RCR2_FREQSEL_12 0x0040 #define PHY_RCR2_CDR_SC_12P 0x0010 #define PHY_RCR2_CALIB_LATE 0x0002 +#define PHY_SSCCR3 0x03 +#define PHY_SSCCR3_STEP_IN 0x2740 +#define PHY_SSCCR3_CHECK_DELAY 0x0008 #define PHY_RTCR 0x04 #define PHY_RDR 0x05 @@ -663,6 +741,16 @@ #define PHY_TUNE_TUNED18 0x01C0 #define PHY_TUNE_TUNED12 0X0020 #define PHY_TUNE_TUNEA12 0x0004 +#define PHY_TUNE_VOLTAGE_MASK 0xFC3F +#define PHY_TUNE_VOLTAGE_3V3 0x03C0 +#define PHY_TUNE_D18_1V8 0x0100 +#define PHY_TUNE_D18_1V7 0x0080 +#define PHY_ANA08 0x08 +#define PHY_ANA08_RX_EQ_DCGAIN 0x5000 +#define PHY_ANA08_SEL_RX_EN 0x0400 +#define PHY_ANA08_RX_EQ_VAL 0x03C0 +#define PHY_ANA08_SCP 0x0020 +#define PHY_ANA08_SEL_IPI 0x0004 #define PHY_IMR 0x09 #define PHY_BPCR 0x0A @@ -678,6 +766,7 @@ #define PHY_HOST_CLK_CTRL 0x0F #define PHY_DMR 0x10 #define PHY_BACR 0x11 +#define PHY_BACR_BASIC_MASK 0xFFF3 #define PHY_IER 0x12 #define PHY_BCSR 0x13 #define PHY_BPR 0x14 @@ -698,12 +787,19 @@ #define PHY_REV_STOP_CLKWR 0x0004 #define PHY_FLD0 0x1A +#define PHY_ANA1A 0x1A +#define PHY_ANA1A_TXR_LOOPBACK 0x2000 +#define PHY_ANA1A_RXT_BIST 0x0500 +#define PHY_ANA1A_TXR_BIST 0x0040 +#define PHY_ANA1A_REV 0x0006 #define PHY_FLD1 0x1B #define PHY_FLD2 0x1C #define PHY_FLD3 0x1D #define PHY_FLD3_TIMER_4 0x0800 #define PHY_FLD3_TIMER_6 0x0020 #define PHY_FLD3_RXDELINK 0x0004 +#define PHY_ANA1D 0x1D +#define PHY_ANA1D_DEBUG_ADDR 0x0004 #define PHY_FLD4 0x1E #define PHY_FLD4_FLDEN_SEL 0x4000 @@ -713,7 +809,18 @@ #define PHY_FLD4_BER_COUNT 0x00E0 #define PHY_FLD4_BER_TIMER 0x000A #define PHY_FLD4_BER_CHK_EN 0x0001 - +#define PHY_DIG1E 0x1E +#define PHY_DIG1E_REV 0x4000 +#define PHY_DIG1E_D0_X_D1 0x1000 +#define PHY_DIG1E_RX_ON_HOST 0x0800 +#define PHY_DIG1E_RCLK_REF_HOST 0x0400 +#define PHY_DIG1E_RCLK_TX_EN_KEEP 0x0040 +#define PHY_DIG1E_RCLK_TX_TERM_KEEP 0x0020 +#define PHY_DIG1E_RCLK_RX_EIDLE_ON 0x0010 +#define PHY_DIG1E_TX_TERM_KEEP 0x0008 +#define PHY_DIG1E_RX_TERM_KEEP 0x0004 +#define PHY_DIG1E_TX_EN_KEEP 0x0002 +#define PHY_DIG1E_RX_EN_KEEP 0x0001 #define PHY_DUM_REG 0x1F #define PCR_SETTING_REG1 0x724 @@ -729,6 +836,8 @@ struct pcr_handle { }; struct pcr_ops { + int (*write_phy)(struct rtsx_pcr *pcr, u8 addr, u16 val); + int (*read_phy)(struct rtsx_pcr *pcr, u8 addr, u16 *val); int (*extra_init_hw)(struct rtsx_pcr *pcr); int (*optimize_phy)(struct rtsx_pcr *pcr); int (*turn_on_led)(struct rtsx_pcr *pcr); @@ -823,6 +932,8 @@ struct rtsx_pcr { const struct pcr_ops *ops; enum PDEV_STAT state; + u16 reg_pm_ctrl3; + int num_slots; struct rtsx_slot *slots; }; @@ -830,6 +941,10 @@ struct rtsx_pcr { #define CHK_PCI_PID(pcr, pid) ((pcr)->pci->device == (pid)) #define PCI_VID(pcr) ((pcr)->pci->vendor) #define PCI_PID(pcr) ((pcr)->pci->device) +#define is_version(pcr, pid, ver) \ + (CHK_PCI_PID(pcr, pid) && (pcr)->ic_version == (ver)) +#define pcr_dbg(pcr, fmt, arg...) \ + dev_dbg(&(pcr)->pci->dev, fmt, ##arg) #define SDR104_PHASE(val) ((val) & 0xFF) #define SDR50_PHASE(val) (((val) >> 8) & 0xFF) @@ -899,4 +1014,17 @@ static inline void rtsx_pci_write_be32(struct rtsx_pcr *pcr, u16 reg, u32 val) rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, reg + 3, 0xFF, val); } +static inline int rtsx_pci_update_phy(struct rtsx_pcr *pcr, u8 addr, + u16 mask, u16 append) +{ + int err; + u16 val; + + err = rtsx_pci_read_phy_register(pcr, addr, &val); + if (err < 0) + return err; + + return rtsx_pci_write_phy_register(pcr, addr, (val & mask) | append); +} + #endif -- cgit v1.2.3 From 41bc2334737a32d3062a318dde5964590d0e24c9 Mon Sep 17 00:00:00 2001 From: Micky Ching Date: Wed, 25 Feb 2015 13:50:15 +0800 Subject: mfd: rtsx: Add support for rts525A Add support for new chip rts525A. Signed-off-by: Micky Ching Signed-off-by: Lee Jones --- include/linux/mfd/rtsx_pci.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h index 754a18d4203a..ff843e7ca23d 100644 --- a/include/linux/mfd/rtsx_pci.h +++ b/include/linux/mfd/rtsx_pci.h @@ -727,6 +727,10 @@ #define PHY_SSCCR3 0x03 #define PHY_SSCCR3_STEP_IN 0x2740 #define PHY_SSCCR3_CHECK_DELAY 0x0008 +#define _PHY_ANA03 0x03 +#define _PHY_ANA03_TIMER_MAX 0x2700 +#define _PHY_ANA03_OOBS_DEB_EN 0x0040 +#define _PHY_CMU_DEBUG_EN 0x0008 #define PHY_RTCR 0x04 #define PHY_RDR 0x05 @@ -785,6 +789,10 @@ #define PHY_REV_STOP_CLKRD 0x0020 #define PHY_REV_RX_PWST 0x0008 #define PHY_REV_STOP_CLKWR 0x0004 +#define _PHY_REV0 0x19 +#define _PHY_REV0_FILTER_OUT 0x3800 +#define _PHY_REV0_CDR_BYPASS_PFD 0x0100 +#define _PHY_REV0_CDR_RX_IDLE_BYPASS 0x0002 #define PHY_FLD0 0x1A #define PHY_ANA1A 0x1A @@ -800,6 +808,13 @@ #define PHY_FLD3_RXDELINK 0x0004 #define PHY_ANA1D 0x1D #define PHY_ANA1D_DEBUG_ADDR 0x0004 +#define _PHY_FLD0 0x1D +#define _PHY_FLD0_CLK_REQ_20C 0x8000 +#define _PHY_FLD0_RX_IDLE_EN 0x1000 +#define _PHY_FLD0_BIT_ERR_RSTN 0x0800 +#define _PHY_FLD0_BER_COUNT 0x01E0 +#define _PHY_FLD0_BER_TIMER 0x001E +#define _PHY_FLD0_CHECK_EN 0x0001 #define PHY_FLD4 0x1E #define PHY_FLD4_FLDEN_SEL 0x4000 -- cgit v1.2.3 From 49010336290f4e3e4249c43bff1a1ff065c8f94e Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 25 Feb 2015 15:37:13 +0000 Subject: mfd: arizona: Move useful defines into a dt-binding include Move parts of linux/mfd/arizona/pdata.h and gpio.h into a new file in the dt-binding directory for use by device tree bindings. This also makes gpio.h redundant so remove it in the process. Signed-off-by: Charles Keepax Acked-by: Mark Brown Acked-by: Rob Herring Signed-off-by: Lee Jones --- include/linux/mfd/arizona/gpio.h | 96 --------------------------------------- include/linux/mfd/arizona/pdata.h | 22 +-------- 2 files changed, 2 insertions(+), 116 deletions(-) delete mode 100644 include/linux/mfd/arizona/gpio.h (limited to 'include/linux') diff --git a/include/linux/mfd/arizona/gpio.h b/include/linux/mfd/arizona/gpio.h deleted file mode 100644 index d2146bb74f89..000000000000 --- a/include/linux/mfd/arizona/gpio.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * GPIO configuration for Arizona devices - * - * Copyright 2013 Wolfson Microelectronics. PLC. - * - * Author: Charles Keepax - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ARIZONA_GPIO_H -#define _ARIZONA_GPIO_H - -#define ARIZONA_GP_FN_TXLRCLK 0x00 -#define ARIZONA_GP_FN_GPIO 0x01 -#define ARIZONA_GP_FN_IRQ1 0x02 -#define ARIZONA_GP_FN_IRQ2 0x03 -#define ARIZONA_GP_FN_OPCLK 0x04 -#define ARIZONA_GP_FN_FLL1_OUT 0x05 -#define ARIZONA_GP_FN_FLL2_OUT 0x06 -#define ARIZONA_GP_FN_PWM1 0x08 -#define ARIZONA_GP_FN_PWM2 0x09 -#define ARIZONA_GP_FN_SYSCLK_UNDERCLOCKED 0x0A -#define ARIZONA_GP_FN_ASYNCCLK_UNDERCLOCKED 0x0B -#define ARIZONA_GP_FN_FLL1_LOCK 0x0C -#define ARIZONA_GP_FN_FLL2_LOCK 0x0D -#define ARIZONA_GP_FN_FLL1_CLOCK_OK 0x0F -#define ARIZONA_GP_FN_FLL2_CLOCK_OK 0x10 -#define ARIZONA_GP_FN_HEADPHONE_DET 0x12 -#define ARIZONA_GP_FN_MIC_DET 0x13 -#define ARIZONA_GP_FN_WSEQ_STATUS 0x15 -#define ARIZONA_GP_FN_CIF_ADDRESS_ERROR 0x16 -#define ARIZONA_GP_FN_ASRC1_LOCK 0x1A -#define ARIZONA_GP_FN_ASRC2_LOCK 0x1B -#define ARIZONA_GP_FN_ASRC_CONFIG_ERROR 0x1C -#define ARIZONA_GP_FN_DRC1_SIGNAL_DETECT 0x1D -#define ARIZONA_GP_FN_DRC1_ANTICLIP 0x1E -#define ARIZONA_GP_FN_DRC1_DECAY 0x1F -#define ARIZONA_GP_FN_DRC1_NOISE 0x20 -#define ARIZONA_GP_FN_DRC1_QUICK_RELEASE 0x21 -#define ARIZONA_GP_FN_DRC2_SIGNAL_DETECT 0x22 -#define ARIZONA_GP_FN_DRC2_ANTICLIP 0x23 -#define ARIZONA_GP_FN_DRC2_DECAY 0x24 -#define ARIZONA_GP_FN_DRC2_NOISE 0x25 -#define ARIZONA_GP_FN_DRC2_QUICK_RELEASE 0x26 -#define ARIZONA_GP_FN_MIXER_DROPPED_SAMPLE 0x27 -#define ARIZONA_GP_FN_AIF1_CONFIG_ERROR 0x28 -#define ARIZONA_GP_FN_AIF2_CONFIG_ERROR 0x29 -#define ARIZONA_GP_FN_AIF3_CONFIG_ERROR 0x2A -#define ARIZONA_GP_FN_SPK_TEMP_SHUTDOWN 0x2B -#define ARIZONA_GP_FN_SPK_TEMP_WARNING 0x2C -#define ARIZONA_GP_FN_UNDERCLOCKED 0x2D -#define ARIZONA_GP_FN_OVERCLOCKED 0x2E -#define ARIZONA_GP_FN_DSP_IRQ1 0x35 -#define ARIZONA_GP_FN_DSP_IRQ2 0x36 -#define ARIZONA_GP_FN_ASYNC_OPCLK 0x3D -#define ARIZONA_GP_FN_BOOT_DONE 0x44 -#define ARIZONA_GP_FN_DSP1_RAM_READY 0x45 -#define ARIZONA_GP_FN_SYSCLK_ENA_STATUS 0x4B -#define ARIZONA_GP_FN_ASYNCCLK_ENA_STATUS 0x4C - -#define ARIZONA_GPN_DIR 0x8000 /* GPN_DIR */ -#define ARIZONA_GPN_DIR_MASK 0x8000 /* GPN_DIR */ -#define ARIZONA_GPN_DIR_SHIFT 15 /* GPN_DIR */ -#define ARIZONA_GPN_DIR_WIDTH 1 /* GPN_DIR */ -#define ARIZONA_GPN_PU 0x4000 /* GPN_PU */ -#define ARIZONA_GPN_PU_MASK 0x4000 /* GPN_PU */ -#define ARIZONA_GPN_PU_SHIFT 14 /* GPN_PU */ -#define ARIZONA_GPN_PU_WIDTH 1 /* GPN_PU */ -#define ARIZONA_GPN_PD 0x2000 /* GPN_PD */ -#define ARIZONA_GPN_PD_MASK 0x2000 /* GPN_PD */ -#define ARIZONA_GPN_PD_SHIFT 13 /* GPN_PD */ -#define ARIZONA_GPN_PD_WIDTH 1 /* GPN_PD */ -#define ARIZONA_GPN_LVL 0x0800 /* GPN_LVL */ -#define ARIZONA_GPN_LVL_MASK 0x0800 /* GPN_LVL */ -#define ARIZONA_GPN_LVL_SHIFT 11 /* GPN_LVL */ -#define ARIZONA_GPN_LVL_WIDTH 1 /* GPN_LVL */ -#define ARIZONA_GPN_POL 0x0400 /* GPN_POL */ -#define ARIZONA_GPN_POL_MASK 0x0400 /* GPN_POL */ -#define ARIZONA_GPN_POL_SHIFT 10 /* GPN_POL */ -#define ARIZONA_GPN_POL_WIDTH 1 /* GPN_POL */ -#define ARIZONA_GPN_OP_CFG 0x0200 /* GPN_OP_CFG */ -#define ARIZONA_GPN_OP_CFG_MASK 0x0200 /* GPN_OP_CFG */ -#define ARIZONA_GPN_OP_CFG_SHIFT 9 /* GPN_OP_CFG */ -#define ARIZONA_GPN_OP_CFG_WIDTH 1 /* GPN_OP_CFG */ -#define ARIZONA_GPN_DB 0x0100 /* GPN_DB */ -#define ARIZONA_GPN_DB_MASK 0x0100 /* GPN_DB */ -#define ARIZONA_GPN_DB_SHIFT 8 /* GPN_DB */ -#define ARIZONA_GPN_DB_WIDTH 1 /* GPN_DB */ -#define ARIZONA_GPN_FN_MASK 0x007F /* GPN_DB */ -#define ARIZONA_GPN_FN_SHIFT 0 /* GPN_DB */ -#define ARIZONA_GPN_FN_WIDTH 7 /* GPN_DB */ - -#endif diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h index 4578c72c9b86..48fe31356605 100644 --- a/include/linux/mfd/arizona/pdata.h +++ b/include/linux/mfd/arizona/pdata.h @@ -11,31 +11,26 @@ #ifndef _ARIZONA_PDATA_H #define _ARIZONA_PDATA_H -#define ARIZONA_GPN_DIR 0x8000 /* GPN_DIR */ +#include + #define ARIZONA_GPN_DIR_MASK 0x8000 /* GPN_DIR */ #define ARIZONA_GPN_DIR_SHIFT 15 /* GPN_DIR */ #define ARIZONA_GPN_DIR_WIDTH 1 /* GPN_DIR */ -#define ARIZONA_GPN_PU 0x4000 /* GPN_PU */ #define ARIZONA_GPN_PU_MASK 0x4000 /* GPN_PU */ #define ARIZONA_GPN_PU_SHIFT 14 /* GPN_PU */ #define ARIZONA_GPN_PU_WIDTH 1 /* GPN_PU */ -#define ARIZONA_GPN_PD 0x2000 /* GPN_PD */ #define ARIZONA_GPN_PD_MASK 0x2000 /* GPN_PD */ #define ARIZONA_GPN_PD_SHIFT 13 /* GPN_PD */ #define ARIZONA_GPN_PD_WIDTH 1 /* GPN_PD */ -#define ARIZONA_GPN_LVL 0x0800 /* GPN_LVL */ #define ARIZONA_GPN_LVL_MASK 0x0800 /* GPN_LVL */ #define ARIZONA_GPN_LVL_SHIFT 11 /* GPN_LVL */ #define ARIZONA_GPN_LVL_WIDTH 1 /* GPN_LVL */ -#define ARIZONA_GPN_POL 0x0400 /* GPN_POL */ #define ARIZONA_GPN_POL_MASK 0x0400 /* GPN_POL */ #define ARIZONA_GPN_POL_SHIFT 10 /* GPN_POL */ #define ARIZONA_GPN_POL_WIDTH 1 /* GPN_POL */ -#define ARIZONA_GPN_OP_CFG 0x0200 /* GPN_OP_CFG */ #define ARIZONA_GPN_OP_CFG_MASK 0x0200 /* GPN_OP_CFG */ #define ARIZONA_GPN_OP_CFG_SHIFT 9 /* GPN_OP_CFG */ #define ARIZONA_GPN_OP_CFG_WIDTH 1 /* GPN_OP_CFG */ -#define ARIZONA_GPN_DB 0x0100 /* GPN_DB */ #define ARIZONA_GPN_DB_MASK 0x0100 /* GPN_DB */ #define ARIZONA_GPN_DB_SHIFT 8 /* GPN_DB */ #define ARIZONA_GPN_DB_WIDTH 1 /* GPN_DB */ @@ -45,23 +40,10 @@ #define ARIZONA_MAX_GPIO 5 -#define ARIZONA_32KZ_MCLK1 1 -#define ARIZONA_32KZ_MCLK2 2 -#define ARIZONA_32KZ_NONE 3 - #define ARIZONA_MAX_INPUT 4 -#define ARIZONA_DMIC_MICVDD 0 -#define ARIZONA_DMIC_MICBIAS1 1 -#define ARIZONA_DMIC_MICBIAS2 2 -#define ARIZONA_DMIC_MICBIAS3 3 - #define ARIZONA_MAX_MICBIAS 3 -#define ARIZONA_INMODE_DIFF 0 -#define ARIZONA_INMODE_SE 1 -#define ARIZONA_INMODE_DMIC 2 - #define ARIZONA_MAX_OUTPUT 6 #define ARIZONA_MAX_AIF 3 -- cgit v1.2.3 From ef9c80b37ad3062d335d4e37846a94d862b5579f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 26 Feb 2015 10:13:50 +0100 Subject: mfd: sec: Cleanup unused RTC fields: ono, WTSR and SMPL The WTSR (Watchdog Timer Software Reset) and SMPL (Sudden Momentary Power Loss) were removed from rtc-s5m driver because they were not used. Remove them (and on/off interrupt) from main MFD driver and header. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Lee Jones --- include/linux/mfd/samsung/core.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h index 3fdb7cfbffb3..75115384f3fc 100644 --- a/include/linux/mfd/samsung/core.h +++ b/include/linux/mfd/samsung/core.h @@ -58,13 +58,7 @@ enum sec_device_type { * @irq_base: Base IRQ number for device, required for IRQs * @irq: Generic IRQ number for device * @irq_data: Runtime data structure for IRQ controller - * @ono: Power onoff IRQ number for s5m87xx * @wakeup: Whether or not this is a wakeup device - * @wtsr_smpl: Whether or not to enable in RTC driver the Watchdog - * Timer Software Reset (registers set to default value - * after PWRHOLD falling) and Sudden Momentary Power Loss - * (PMIC will enter power on sequence after short drop in - * VBATT voltage). */ struct sec_pmic_dev { struct device *dev; @@ -77,9 +71,7 @@ struct sec_pmic_dev { int irq; struct regmap_irq_chip_data *irq_data; - int ono; bool wakeup; - bool wtsr_smpl; }; int sec_irq_init(struct sec_pmic_dev *sec_pmic); @@ -95,7 +87,6 @@ struct sec_platform_data { int irq_base; int (*cfg_pmic_irq)(void); - int ono; bool wakeup; bool buck_voltage_lock; -- cgit v1.2.3 From 4d0b0bd4385f0ce8d3b430f9667c5e2ca1de10af Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Feb 2015 17:47:25 +0800 Subject: f2fs: simplfy a field name in struct f2fs_extent,extent_info Rename a filed name from 'blk_addr' to 'blk' in struct {f2fs_extent,extent_info} as annotation of this field descripts its meaning well to us. By this way, we can avoid long statement in code of following patches. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index a23556c32703..502f28cfb78e 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -153,7 +153,7 @@ struct f2fs_orphan_block { */ struct f2fs_extent { __le32 fofs; /* start file offset of the extent */ - __le32 blk_addr; /* start block address of the extent */ + __le32 blk; /* start block address of the extent */ __le32 len; /* lengh of the extent */ } __packed; -- cgit v1.2.3 From d24209bb689e2c7f7418faec9b4a948e922d24da Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 21 Jan 2015 15:26:03 -0800 Subject: rcu: Improve diagnostics for blocked critical sections in irq If an RCU read-side critical section occurs within an interrupt handler or a softirq handler, it cannot have been preempted. Therefore, there is a check in rcu_read_unlock_special() checking for this error. However, when this check triggers, it lacks diagnostic information. This commit therefore moves rcu_read_unlock()'s lockdep annotation to follow the call to __rcu_read_unlock() and changes rcu_read_unlock_special()'s WARN_ON_ONCE() to an lockdep_rcu_suspicious() in order to locate where the offending RCU read-side critical section began. In addition, the value of the ->rcu_read_unlock_special field is printed. Signed-off-by: Paul E. McKenney --- include/linux/lockdep.h | 7 ++++++- include/linux/rcupdate.h | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 74ab23176e9b..066ba4157541 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -531,8 +531,13 @@ do { \ # define might_lock_read(lock) do { } while (0) #endif -#ifdef CONFIG_PROVE_RCU +#ifdef CONFIG_LOCKDEP void lockdep_rcu_suspicious(const char *file, const int line, const char *s); +#else +static inline void +lockdep_rcu_suspicious(const char *file, const int line, const char *s) +{ +} #endif #endif /* __LINUX_LOCKDEP_H */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 3e6afed51051..70b896e16f19 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -942,9 +942,9 @@ static inline void rcu_read_unlock(void) { rcu_lockdep_assert(rcu_is_watching(), "rcu_read_unlock() used illegally while idle"); - rcu_lock_release(&rcu_lock_map); __release(RCU); __rcu_read_unlock(); + rcu_lock_release(&rcu_lock_map); /* Keep acq info for rls diags. */ } /** -- cgit v1.2.3 From b826565aaf8809df146666c03d1acbb7febbd13e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 2 Feb 2015 11:46:33 -0800 Subject: rcu: Reverse rcu_dereference_check() conditions The rcu_dereference_check() family of primitives evaluates the RCU lockdep expression first, and only then evaluates the expression passed in. This works fine normally, but can potentially fail in environments (such as NMI handlers) where lockdep cannot be invoked. The problem is that even if the expression passed in is "1", the compiler would need to prove that the RCU lockdep expression (rcu_read_lock_held(), for example) is free of side effects in order to be able to elide it. Given that rcu_read_lock_held() is sometimes separately compiled, the compiler cannot always use this optimization. This commit therefore reverse the order of evaluation, so that the expression passed in is evaluated first, and the RCU lockdep expression is evaluated only if the passed-in expression evaluated to false, courtesy of the C-language short-circuit boolean evaluation rules. This compells the compiler to forego executing the RCU lockdep expression in cases where the passed-in expression evaluates to "1" at compile time, so that (for example) rcu_dereference_raw() can be guaranteed to execute safely within an NMI handler. Signed-off-by: Paul E. McKenney Acked-by: Peter Zijlstra (Intel) --- include/linux/rcupdate.h | 6 +++--- include/linux/srcu.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 70b896e16f19..416ae2848c6c 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -729,7 +729,7 @@ static inline void rcu_preempt_sleep_check(void) * annotated as __rcu. */ #define rcu_dereference_check(p, c) \ - __rcu_dereference_check((p), rcu_read_lock_held() || (c), __rcu) + __rcu_dereference_check((p), (c) || rcu_read_lock_held(), __rcu) /** * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking @@ -739,7 +739,7 @@ static inline void rcu_preempt_sleep_check(void) * This is the RCU-bh counterpart to rcu_dereference_check(). */ #define rcu_dereference_bh_check(p, c) \ - __rcu_dereference_check((p), rcu_read_lock_bh_held() || (c), __rcu) + __rcu_dereference_check((p), (c) || rcu_read_lock_bh_held(), __rcu) /** * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking @@ -749,7 +749,7 @@ static inline void rcu_preempt_sleep_check(void) * This is the RCU-sched counterpart to rcu_dereference_check(). */ #define rcu_dereference_sched_check(p, c) \ - __rcu_dereference_check((p), rcu_read_lock_sched_held() || (c), \ + __rcu_dereference_check((p), (c) || rcu_read_lock_sched_held(), \ __rcu) #define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/ diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 9cfd9623fb03..bdeb4567b71e 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -182,7 +182,7 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp) * lockdep_is_held() calls. */ #define srcu_dereference_check(p, sp, c) \ - __rcu_dereference_check((p), srcu_read_lock_held(sp) || (c), __rcu) + __rcu_dereference_check((p), (c) || srcu_read_lock_held(sp), __rcu) /** * srcu_dereference - fetch SRCU-protected pointer for later dereferencing -- cgit v1.2.3 From ed748621031c2a205749997421e59fb4dfb1e909 Mon Sep 17 00:00:00 2001 From: Murali Karicheri Date: Tue, 3 Mar 2015 12:52:08 -0500 Subject: of: iommu: Add ptr to OF node arg to of_iommu_configure() of_iommu_configure() is called from of_dma_configure() to setup iommu ops using DT property. This API is currently used for platform devices for which DMA configuration (including IOMMU ops) may come from the device's parent. To extend this functionality for PCI devices, this API needs to take a parent node ptr as an argument instead of assuming device's parent. This is needed since for PCI, the DMA configuration may be defined in the DT node of the root bus bridge's parent device. Currently only dma-range is used for PCI and IOMMU is not supported. Return error if the device is PCI. Add "parent" parameter (a struct device_node *) to of_iommu_configure(). Tested-by: Suravee Suthikulpanit (AMD Seattle) Signed-off-by: Murali Karicheri Signed-off-by: Bjorn Helgaas Reviewed-by: Catalin Marinas Acked-by: Rob Herring Acked-by: Will Deacon CC: Joerg Roedel CC: Grant Likely CC: Russell King CC: Arnd Bergmann --- include/linux/of_iommu.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h index 16c75547d725..ffbe4707d4aa 100644 --- a/include/linux/of_iommu.h +++ b/include/linux/of_iommu.h @@ -12,7 +12,8 @@ extern int of_get_dma_window(struct device_node *dn, const char *prefix, size_t *size); extern void of_iommu_init(void); -extern struct iommu_ops *of_iommu_configure(struct device *dev); +extern struct iommu_ops *of_iommu_configure(struct device *dev, + struct device_node *master_np); #else @@ -24,7 +25,8 @@ static inline int of_get_dma_window(struct device_node *dn, const char *prefix, } static inline void of_iommu_init(void) { } -static inline struct iommu_ops *of_iommu_configure(struct device *dev) +static inline struct iommu_ops *of_iommu_configure(struct device *dev, + struct device_node *master_np) { return NULL; } -- cgit v1.2.3 From 1f5c69aa51f9c7c8d2a5c2e4dc339f6c7d5c15cd Mon Sep 17 00:00:00 2001 From: Murali Karicheri Date: Tue, 3 Mar 2015 12:52:09 -0500 Subject: of: Move of_dma_configure() to device.c to help re-use Move of_dma_configure() to device.c so it can be re-used for PCI devices to obtain DMA configuration from DT. Also add a second argument so that for PCI, the DT node of root bus host bridge can be used to obtain the DMA configuration for the slave PCI device. Tested-by: Suravee Suthikulpanit (AMD Seattle) Signed-off-by: Murali Karicheri Signed-off-by: Bjorn Helgaas Reviewed-by: Catalin Marinas Acked-by: Will Deacon Acked-by: Rob Herring CC: Joerg Roedel CC: Grant Likely CC: Russell King CC: Arnd Bergmann --- include/linux/of_device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_device.h b/include/linux/of_device.h index ef370210ffb2..22801b10cef5 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -53,6 +53,7 @@ static inline struct device_node *of_cpu_device_node_get(int cpu) return of_node_get(cpu_dev->of_node); } +void of_dma_configure(struct device *dev, struct device_node *np); #else /* CONFIG_OF */ static inline int of_driver_match_device(struct device *dev, @@ -90,6 +91,8 @@ static inline struct device_node *of_cpu_device_node_get(int cpu) { return NULL; } +static inline void of_dma_configure(struct device *dev, struct device_node *np) +{} #endif /* CONFIG_OF */ #endif /* _LINUX_OF_DEVICE_H */ -- cgit v1.2.3 From 6675a601d72be408025e675599702e30a99188aa Mon Sep 17 00:00:00 2001 From: Murali Karicheri Date: Tue, 3 Mar 2015 12:52:11 -0500 Subject: PCI: Add helper functions pci_get[put]_host_bridge_device() Add helper functions to get/put the root bus's host bridge device. Tested-by: Suravee Suthikulpanit (AMD Seattle) Signed-off-by: Murali Karicheri Signed-off-by: Bjorn Helgaas Reviewed-by: Catalin Marinas Acked-by: Will Deacon CC: Joerg Roedel CC: Grant Likely CC: Rob Herring CC: Russell King CC: Arnd Bergmann --- include/linux/pci.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 211e9da8a7d7..a379513bddef 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -510,6 +510,9 @@ static inline struct pci_dev *pci_upstream_bridge(struct pci_dev *dev) return dev->bus->self; } +struct device *pci_get_host_bridge_device(struct pci_dev *dev); +void pci_put_host_bridge_device(struct device *dev); + #ifdef CONFIG_PCI_MSI static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev) { -- cgit v1.2.3 From c49b8fc26e115a37eca6f7bcef1847eb80f2a4fd Mon Sep 17 00:00:00 2001 From: Murali Karicheri Date: Tue, 3 Mar 2015 12:52:12 -0500 Subject: of/pci: Add of_pci_dma_configure() to update DMA configuration Add of_pci_dma_configure() to allow updating the DMA configuration of the PCI device using the configuration from DT of the parent of the root bridge device. Use the newly added APIs pci_get/put_host_bridge_device() for implementing this. [bhelgaas: fold in fix for host bridges with no parent OF device] Tested-by: Suravee Suthikulpanit (AMD Seattle) Signed-off-by: Murali Karicheri Signed-off-by: Bjorn Helgaas Reviewed-by: Catalin Marinas Acked-by: Rob Herring Acked-by: Will Deacon CC: Joerg Roedel CC: Grant Likely CC: Russell King CC: Arnd Bergmann --- include/linux/of_pci.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h index ce0e5abeb454..29fd3fe1c035 100644 --- a/include/linux/of_pci.h +++ b/include/linux/of_pci.h @@ -16,6 +16,7 @@ int of_pci_get_devfn(struct device_node *np); int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin); int of_pci_parse_bus_range(struct device_node *node, struct resource *res); int of_get_pci_domain_nr(struct device_node *node); +void of_pci_dma_configure(struct pci_dev *pci_dev); #else static inline int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq) { @@ -50,6 +51,8 @@ of_get_pci_domain_nr(struct device_node *node) { return -1; } + +static inline void of_pci_dma_configure(struct pci_dev *pci_dev) { } #endif #if defined(CONFIG_OF_ADDRESS) -- cgit v1.2.3 From 112bdfaa525fd5993e17885861342893f15290b0 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 16 Feb 2015 15:41:02 +0000 Subject: extcon: arizona: Deobfuscate arizona_extcon_do_magic arizona_extcon_do_magic does not lend a lot of clarity to the purpose of the function, and as all the registers used are described in the datasheet there is no need to obfuscate the code. This patch renames the function to arizona_extcon_hp_clamp, as it controls clamping on the headphone output. Signed-off-by: Charles Keepax Acked-by: Lee Jones Signed-off-by: Chanwoo Choi --- include/linux/mfd/arizona/core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h index 910e3aa1e965..4863548faff7 100644 --- a/include/linux/mfd/arizona/core.h +++ b/include/linux/mfd/arizona/core.h @@ -126,7 +126,7 @@ struct arizona { struct regmap_irq_chip_data *aod_irq_chip; struct regmap_irq_chip_data *irq_chip; - bool hpdet_magic; + bool hpdet_clamp; unsigned int hp_ena; struct mutex clk_lock; -- cgit v1.2.3 From 0189197f441602acdca3f97750d392a895b778fd Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 3 Mar 2015 19:10:47 -0600 Subject: mpls: Basic routing support This change adds a new Kconfig option MPLS_ROUTING. The core of this change is the code to look at an mpls packet received from another machine. Look that packet up in a routing table and forward the packet on. Support of MPLS over ATM is not considered or attempted here. This implemntation follows RFC3032 and implements the MPLS shim header that can pass over essentially any network. What RFC3021 refers to as the as the Incoming Label Map (ILM) I call net->mpls.platform_label[]. What RFC3031 refers to as the Next Label Hop Forwarding Entry (NHLFE) I call mpls_route. Though calling it the label fordwarding information base (lfib) might also be valid. Further the implemntation forwards packets as described in RFC3032. There is no need and given the original motivation for MPLS a strong discincentive to have a flexible label forwarding path. In essence the logic is the topmost label is read, looked up, removed, and replaced by 0 or more new lables and the sent out the specified interface to it's next hop. Quite a few optional features are not implemented here. Among them are generation of ICMP errors when the TTL is exceeded or the packet is larger than the next hop MTU (those conditions are detected and the packets are dropped instead of generating an icmp error). The traffic class field is always set to 0. The implementation focuses on IP over MPLS and does not handle egress of other kinds of protocols. Instead of implementing coordination with the neighbour table and sorting out how to input next hops in a different address family (for which there is value). I was lazy and implemented a next hop mac address instead. The code is simpler and there are flavor of MPLS such as MPLS-TP where neither an IPv4 nor an IPv6 next hop is appropriate so a next hop by mac address would need to be implemented at some point. Two new definitions AF_MPLS and PF_MPLS are exposed to userspace. Decoding the mpls header must be done by first byeswapping a 32bit bit endian word into the local cpu endian and then bit shifting to extract the pieces. There is no C bit-field that can represent a wire format mpls header on a little endian machine as the low bits of the 20bit label wind up in the wrong half of third byte. Therefore internally everything is deal with in cpu native byte order except when writing to and reading from a packet. For management simplicity if a label is configured to forward out an interface that is down the packet is dropped early. Similarly if an network interface is removed rt_dev is updated to NULL (so no reference is preserved) and any packets for that label are dropped. Keeping the label entries in the kernel allows the kernel label table to function as the definitive source of which labels are allocated and which are not. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/socket.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 5c19cba34dce..fab4d0ddf4ed 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -181,6 +181,7 @@ struct ucred { #define AF_WANPIPE 25 /* Wanpipe API Sockets */ #define AF_LLC 26 /* Linux LLC */ #define AF_IB 27 /* Native InfiniBand address */ +#define AF_MPLS 28 /* MPLS */ #define AF_CAN 29 /* Controller Area Network */ #define AF_TIPC 30 /* TIPC sockets */ #define AF_BLUETOOTH 31 /* Bluetooth sockets */ @@ -226,6 +227,7 @@ struct ucred { #define PF_WANPIPE AF_WANPIPE #define PF_LLC AF_LLC #define PF_IB AF_IB +#define PF_MPLS AF_MPLS #define PF_CAN AF_CAN #define PF_TIPC AF_TIPC #define PF_BLUETOOTH AF_BLUETOOTH -- cgit v1.2.3 From 6df8dd5c185b212d3d7364402df58bff0e67ace4 Mon Sep 17 00:00:00 2001 From: Flora Fu Date: Sun, 22 Feb 2015 13:15:29 +0100 Subject: mfd: Add support for the MediaTek MT6397 PMIC This adds support for the MediaTek MT6397 PMIC. This is a multifunction device with the following sub modules: - Regulator - RTC - Audio codec - GPIO - Clock It is interfaced to the host controller using SPI interface by a proprietary hardware called PMIC wrapper or pwrap. MT6397 MFD is a child device of the pwrap. Signed-off-by: Flora Fu, MediaTek Signed-off-by: Sascha Hauer Signed-off-by: Lee Jones --- include/linux/mfd/mt6397/core.h | 64 +++++++ include/linux/mfd/mt6397/registers.h | 362 +++++++++++++++++++++++++++++++++++ 2 files changed, 426 insertions(+) create mode 100644 include/linux/mfd/mt6397/core.h create mode 100644 include/linux/mfd/mt6397/registers.h (limited to 'include/linux') diff --git a/include/linux/mfd/mt6397/core.h b/include/linux/mfd/mt6397/core.h new file mode 100644 index 000000000000..cf5265b0d1c1 --- /dev/null +++ b/include/linux/mfd/mt6397/core.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2014 MediaTek Inc. + * Author: Flora Fu, MediaTek + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __MFD_MT6397_CORE_H__ +#define __MFD_MT6397_CORE_H__ + +enum mt6397_irq_numbers { + MT6397_IRQ_SPKL_AB = 0, + MT6397_IRQ_SPKR_AB, + MT6397_IRQ_SPKL, + MT6397_IRQ_SPKR, + MT6397_IRQ_BAT_L, + MT6397_IRQ_BAT_H, + MT6397_IRQ_FG_BAT_L, + MT6397_IRQ_FG_BAT_H, + MT6397_IRQ_WATCHDOG, + MT6397_IRQ_PWRKEY, + MT6397_IRQ_THR_L, + MT6397_IRQ_THR_H, + MT6397_IRQ_VBATON_UNDET, + MT6397_IRQ_BVALID_DET, + MT6397_IRQ_CHRDET, + MT6397_IRQ_OV, + MT6397_IRQ_LDO, + MT6397_IRQ_HOMEKEY, + MT6397_IRQ_ACCDET, + MT6397_IRQ_AUDIO, + MT6397_IRQ_RTC, + MT6397_IRQ_PWRKEY_RSTB, + MT6397_IRQ_HDMI_SIFM, + MT6397_IRQ_HDMI_CEC, + MT6397_IRQ_VCA15, + MT6397_IRQ_VSRMCA15, + MT6397_IRQ_VCORE, + MT6397_IRQ_VGPU, + MT6397_IRQ_VIO18, + MT6397_IRQ_VPCA7, + MT6397_IRQ_VSRMCA7, + MT6397_IRQ_VDRM, + MT6397_IRQ_NR, +}; + +struct mt6397_chip { + struct device *dev; + struct regmap *regmap; + int irq; + struct irq_domain *irq_domain; + struct mutex irqlock; + u16 irq_masks_cur[2]; + u16 irq_masks_cache[2]; +}; + +#endif /* __MFD_MT6397_CORE_H__ */ diff --git a/include/linux/mfd/mt6397/registers.h b/include/linux/mfd/mt6397/registers.h new file mode 100644 index 000000000000..f23a0a60a877 --- /dev/null +++ b/include/linux/mfd/mt6397/registers.h @@ -0,0 +1,362 @@ +/* + * Copyright (c) 2014 MediaTek Inc. + * Author: Flora Fu, MediaTek + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __MFD_MT6397_REGISTERS_H__ +#define __MFD_MT6397_REGISTERS_H__ + +/* PMIC Registers */ +#define MT6397_CID 0x0100 +#define MT6397_TOP_CKPDN 0x0102 +#define MT6397_TOP_CKPDN_SET 0x0104 +#define MT6397_TOP_CKPDN_CLR 0x0106 +#define MT6397_TOP_CKPDN2 0x0108 +#define MT6397_TOP_CKPDN2_SET 0x010A +#define MT6397_TOP_CKPDN2_CLR 0x010C +#define MT6397_TOP_GPIO_CKPDN 0x010E +#define MT6397_TOP_RST_CON 0x0114 +#define MT6397_WRP_CKPDN 0x011A +#define MT6397_WRP_RST_CON 0x0120 +#define MT6397_TOP_RST_MISC 0x0126 +#define MT6397_TOP_CKCON1 0x0128 +#define MT6397_TOP_CKCON2 0x012A +#define MT6397_TOP_CKTST1 0x012C +#define MT6397_TOP_CKTST2 0x012E +#define MT6397_OC_DEG_EN 0x0130 +#define MT6397_OC_CTL0 0x0132 +#define MT6397_OC_CTL1 0x0134 +#define MT6397_OC_CTL2 0x0136 +#define MT6397_INT_RSV 0x0138 +#define MT6397_TEST_CON0 0x013A +#define MT6397_TEST_CON1 0x013C +#define MT6397_STATUS0 0x013E +#define MT6397_STATUS1 0x0140 +#define MT6397_PGSTATUS 0x0142 +#define MT6397_CHRSTATUS 0x0144 +#define MT6397_OCSTATUS0 0x0146 +#define MT6397_OCSTATUS1 0x0148 +#define MT6397_OCSTATUS2 0x014A +#define MT6397_HDMI_PAD_IE 0x014C +#define MT6397_TEST_OUT_L 0x014E +#define MT6397_TEST_OUT_H 0x0150 +#define MT6397_TDSEL_CON 0x0152 +#define MT6397_RDSEL_CON 0x0154 +#define MT6397_GPIO_SMT_CON0 0x0156 +#define MT6397_GPIO_SMT_CON1 0x0158 +#define MT6397_GPIO_SMT_CON2 0x015A +#define MT6397_GPIO_SMT_CON3 0x015C +#define MT6397_DRV_CON0 0x015E +#define MT6397_DRV_CON1 0x0160 +#define MT6397_DRV_CON2 0x0162 +#define MT6397_DRV_CON3 0x0164 +#define MT6397_DRV_CON4 0x0166 +#define MT6397_DRV_CON5 0x0168 +#define MT6397_DRV_CON6 0x016A +#define MT6397_DRV_CON7 0x016C +#define MT6397_DRV_CON8 0x016E +#define MT6397_DRV_CON9 0x0170 +#define MT6397_DRV_CON10 0x0172 +#define MT6397_DRV_CON11 0x0174 +#define MT6397_DRV_CON12 0x0176 +#define MT6397_INT_CON0 0x0178 +#define MT6397_INT_CON1 0x017E +#define MT6397_INT_STATUS0 0x0184 +#define MT6397_INT_STATUS1 0x0186 +#define MT6397_FQMTR_CON0 0x0188 +#define MT6397_FQMTR_CON1 0x018A +#define MT6397_FQMTR_CON2 0x018C +#define MT6397_EFUSE_DOUT_0_15 0x01C4 +#define MT6397_EFUSE_DOUT_16_31 0x01C6 +#define MT6397_EFUSE_DOUT_32_47 0x01C8 +#define MT6397_EFUSE_DOUT_48_63 0x01CA +#define MT6397_SPI_CON 0x01CC +#define MT6397_TOP_CKPDN3 0x01CE +#define MT6397_TOP_CKCON3 0x01D4 +#define MT6397_EFUSE_DOUT_64_79 0x01D6 +#define MT6397_EFUSE_DOUT_80_95 0x01D8 +#define MT6397_EFUSE_DOUT_96_111 0x01DA +#define MT6397_EFUSE_DOUT_112_127 0x01DC +#define MT6397_EFUSE_DOUT_128_143 0x01DE +#define MT6397_EFUSE_DOUT_144_159 0x01E0 +#define MT6397_EFUSE_DOUT_160_175 0x01E2 +#define MT6397_EFUSE_DOUT_176_191 0x01E4 +#define MT6397_EFUSE_DOUT_192_207 0x01E6 +#define MT6397_EFUSE_DOUT_208_223 0x01E8 +#define MT6397_EFUSE_DOUT_224_239 0x01EA +#define MT6397_EFUSE_DOUT_240_255 0x01EC +#define MT6397_EFUSE_DOUT_256_271 0x01EE +#define MT6397_EFUSE_DOUT_272_287 0x01F0 +#define MT6397_EFUSE_DOUT_288_300 0x01F2 +#define MT6397_EFUSE_DOUT_304_319 0x01F4 +#define MT6397_BUCK_CON0 0x0200 +#define MT6397_BUCK_CON1 0x0202 +#define MT6397_BUCK_CON2 0x0204 +#define MT6397_BUCK_CON3 0x0206 +#define MT6397_BUCK_CON4 0x0208 +#define MT6397_BUCK_CON5 0x020A +#define MT6397_BUCK_CON6 0x020C +#define MT6397_BUCK_CON7 0x020E +#define MT6397_BUCK_CON8 0x0210 +#define MT6397_BUCK_CON9 0x0212 +#define MT6397_VCA15_CON0 0x0214 +#define MT6397_VCA15_CON1 0x0216 +#define MT6397_VCA15_CON2 0x0218 +#define MT6397_VCA15_CON3 0x021A +#define MT6397_VCA15_CON4 0x021C +#define MT6397_VCA15_CON5 0x021E +#define MT6397_VCA15_CON6 0x0220 +#define MT6397_VCA15_CON7 0x0222 +#define MT6397_VCA15_CON8 0x0224 +#define MT6397_VCA15_CON9 0x0226 +#define MT6397_VCA15_CON10 0x0228 +#define MT6397_VCA15_CON11 0x022A +#define MT6397_VCA15_CON12 0x022C +#define MT6397_VCA15_CON13 0x022E +#define MT6397_VCA15_CON14 0x0230 +#define MT6397_VCA15_CON15 0x0232 +#define MT6397_VCA15_CON16 0x0234 +#define MT6397_VCA15_CON17 0x0236 +#define MT6397_VCA15_CON18 0x0238 +#define MT6397_VSRMCA15_CON0 0x023A +#define MT6397_VSRMCA15_CON1 0x023C +#define MT6397_VSRMCA15_CON2 0x023E +#define MT6397_VSRMCA15_CON3 0x0240 +#define MT6397_VSRMCA15_CON4 0x0242 +#define MT6397_VSRMCA15_CON5 0x0244 +#define MT6397_VSRMCA15_CON6 0x0246 +#define MT6397_VSRMCA15_CON7 0x0248 +#define MT6397_VSRMCA15_CON8 0x024A +#define MT6397_VSRMCA15_CON9 0x024C +#define MT6397_VSRMCA15_CON10 0x024E +#define MT6397_VSRMCA15_CON11 0x0250 +#define MT6397_VSRMCA15_CON12 0x0252 +#define MT6397_VSRMCA15_CON13 0x0254 +#define MT6397_VSRMCA15_CON14 0x0256 +#define MT6397_VSRMCA15_CON15 0x0258 +#define MT6397_VSRMCA15_CON16 0x025A +#define MT6397_VSRMCA15_CON17 0x025C +#define MT6397_VSRMCA15_CON18 0x025E +#define MT6397_VSRMCA15_CON19 0x0260 +#define MT6397_VSRMCA15_CON20 0x0262 +#define MT6397_VSRMCA15_CON21 0x0264 +#define MT6397_VCORE_CON0 0x0266 +#define MT6397_VCORE_CON1 0x0268 +#define MT6397_VCORE_CON2 0x026A +#define MT6397_VCORE_CON3 0x026C +#define MT6397_VCORE_CON4 0x026E +#define MT6397_VCORE_CON5 0x0270 +#define MT6397_VCORE_CON6 0x0272 +#define MT6397_VCORE_CON7 0x0274 +#define MT6397_VCORE_CON8 0x0276 +#define MT6397_VCORE_CON9 0x0278 +#define MT6397_VCORE_CON10 0x027A +#define MT6397_VCORE_CON11 0x027C +#define MT6397_VCORE_CON12 0x027E +#define MT6397_VCORE_CON13 0x0280 +#define MT6397_VCORE_CON14 0x0282 +#define MT6397_VCORE_CON15 0x0284 +#define MT6397_VCORE_CON16 0x0286 +#define MT6397_VCORE_CON17 0x0288 +#define MT6397_VCORE_CON18 0x028A +#define MT6397_VGPU_CON0 0x028C +#define MT6397_VGPU_CON1 0x028E +#define MT6397_VGPU_CON2 0x0290 +#define MT6397_VGPU_CON3 0x0292 +#define MT6397_VGPU_CON4 0x0294 +#define MT6397_VGPU_CON5 0x0296 +#define MT6397_VGPU_CON6 0x0298 +#define MT6397_VGPU_CON7 0x029A +#define MT6397_VGPU_CON8 0x029C +#define MT6397_VGPU_CON9 0x029E +#define MT6397_VGPU_CON10 0x02A0 +#define MT6397_VGPU_CON11 0x02A2 +#define MT6397_VGPU_CON12 0x02A4 +#define MT6397_VGPU_CON13 0x02A6 +#define MT6397_VGPU_CON14 0x02A8 +#define MT6397_VGPU_CON15 0x02AA +#define MT6397_VGPU_CON16 0x02AC +#define MT6397_VGPU_CON17 0x02AE +#define MT6397_VGPU_CON18 0x02B0 +#define MT6397_VIO18_CON0 0x0300 +#define MT6397_VIO18_CON1 0x0302 +#define MT6397_VIO18_CON2 0x0304 +#define MT6397_VIO18_CON3 0x0306 +#define MT6397_VIO18_CON4 0x0308 +#define MT6397_VIO18_CON5 0x030A +#define MT6397_VIO18_CON6 0x030C +#define MT6397_VIO18_CON7 0x030E +#define MT6397_VIO18_CON8 0x0310 +#define MT6397_VIO18_CON9 0x0312 +#define MT6397_VIO18_CON10 0x0314 +#define MT6397_VIO18_CON11 0x0316 +#define MT6397_VIO18_CON12 0x0318 +#define MT6397_VIO18_CON13 0x031A +#define MT6397_VIO18_CON14 0x031C +#define MT6397_VIO18_CON15 0x031E +#define MT6397_VIO18_CON16 0x0320 +#define MT6397_VIO18_CON17 0x0322 +#define MT6397_VIO18_CON18 0x0324 +#define MT6397_VPCA7_CON0 0x0326 +#define MT6397_VPCA7_CON1 0x0328 +#define MT6397_VPCA7_CON2 0x032A +#define MT6397_VPCA7_CON3 0x032C +#define MT6397_VPCA7_CON4 0x032E +#define MT6397_VPCA7_CON5 0x0330 +#define MT6397_VPCA7_CON6 0x0332 +#define MT6397_VPCA7_CON7 0x0334 +#define MT6397_VPCA7_CON8 0x0336 +#define MT6397_VPCA7_CON9 0x0338 +#define MT6397_VPCA7_CON10 0x033A +#define MT6397_VPCA7_CON11 0x033C +#define MT6397_VPCA7_CON12 0x033E +#define MT6397_VPCA7_CON13 0x0340 +#define MT6397_VPCA7_CON14 0x0342 +#define MT6397_VPCA7_CON15 0x0344 +#define MT6397_VPCA7_CON16 0x0346 +#define MT6397_VPCA7_CON17 0x0348 +#define MT6397_VPCA7_CON18 0x034A +#define MT6397_VSRMCA7_CON0 0x034C +#define MT6397_VSRMCA7_CON1 0x034E +#define MT6397_VSRMCA7_CON2 0x0350 +#define MT6397_VSRMCA7_CON3 0x0352 +#define MT6397_VSRMCA7_CON4 0x0354 +#define MT6397_VSRMCA7_CON5 0x0356 +#define MT6397_VSRMCA7_CON6 0x0358 +#define MT6397_VSRMCA7_CON7 0x035A +#define MT6397_VSRMCA7_CON8 0x035C +#define MT6397_VSRMCA7_CON9 0x035E +#define MT6397_VSRMCA7_CON10 0x0360 +#define MT6397_VSRMCA7_CON11 0x0362 +#define MT6397_VSRMCA7_CON12 0x0364 +#define MT6397_VSRMCA7_CON13 0x0366 +#define MT6397_VSRMCA7_CON14 0x0368 +#define MT6397_VSRMCA7_CON15 0x036A +#define MT6397_VSRMCA7_CON16 0x036C +#define MT6397_VSRMCA7_CON17 0x036E +#define MT6397_VSRMCA7_CON18 0x0370 +#define MT6397_VSRMCA7_CON19 0x0372 +#define MT6397_VSRMCA7_CON20 0x0374 +#define MT6397_VSRMCA7_CON21 0x0376 +#define MT6397_VDRM_CON0 0x0378 +#define MT6397_VDRM_CON1 0x037A +#define MT6397_VDRM_CON2 0x037C +#define MT6397_VDRM_CON3 0x037E +#define MT6397_VDRM_CON4 0x0380 +#define MT6397_VDRM_CON5 0x0382 +#define MT6397_VDRM_CON6 0x0384 +#define MT6397_VDRM_CON7 0x0386 +#define MT6397_VDRM_CON8 0x0388 +#define MT6397_VDRM_CON9 0x038A +#define MT6397_VDRM_CON10 0x038C +#define MT6397_VDRM_CON11 0x038E +#define MT6397_VDRM_CON12 0x0390 +#define MT6397_VDRM_CON13 0x0392 +#define MT6397_VDRM_CON14 0x0394 +#define MT6397_VDRM_CON15 0x0396 +#define MT6397_VDRM_CON16 0x0398 +#define MT6397_VDRM_CON17 0x039A +#define MT6397_VDRM_CON18 0x039C +#define MT6397_BUCK_K_CON0 0x039E +#define MT6397_BUCK_K_CON1 0x03A0 +#define MT6397_ANALDO_CON0 0x0400 +#define MT6397_ANALDO_CON1 0x0402 +#define MT6397_ANALDO_CON2 0x0404 +#define MT6397_ANALDO_CON3 0x0406 +#define MT6397_ANALDO_CON4 0x0408 +#define MT6397_ANALDO_CON5 0x040A +#define MT6397_ANALDO_CON6 0x040C +#define MT6397_ANALDO_CON7 0x040E +#define MT6397_DIGLDO_CON0 0x0410 +#define MT6397_DIGLDO_CON1 0x0412 +#define MT6397_DIGLDO_CON2 0x0414 +#define MT6397_DIGLDO_CON3 0x0416 +#define MT6397_DIGLDO_CON4 0x0418 +#define MT6397_DIGLDO_CON5 0x041A +#define MT6397_DIGLDO_CON6 0x041C +#define MT6397_DIGLDO_CON7 0x041E +#define MT6397_DIGLDO_CON8 0x0420 +#define MT6397_DIGLDO_CON9 0x0422 +#define MT6397_DIGLDO_CON10 0x0424 +#define MT6397_DIGLDO_CON11 0x0426 +#define MT6397_DIGLDO_CON12 0x0428 +#define MT6397_DIGLDO_CON13 0x042A +#define MT6397_DIGLDO_CON14 0x042C +#define MT6397_DIGLDO_CON15 0x042E +#define MT6397_DIGLDO_CON16 0x0430 +#define MT6397_DIGLDO_CON17 0x0432 +#define MT6397_DIGLDO_CON18 0x0434 +#define MT6397_DIGLDO_CON19 0x0436 +#define MT6397_DIGLDO_CON20 0x0438 +#define MT6397_DIGLDO_CON21 0x043A +#define MT6397_DIGLDO_CON22 0x043C +#define MT6397_DIGLDO_CON23 0x043E +#define MT6397_DIGLDO_CON24 0x0440 +#define MT6397_DIGLDO_CON25 0x0442 +#define MT6397_DIGLDO_CON26 0x0444 +#define MT6397_DIGLDO_CON27 0x0446 +#define MT6397_DIGLDO_CON28 0x0448 +#define MT6397_DIGLDO_CON29 0x044A +#define MT6397_DIGLDO_CON30 0x044C +#define MT6397_DIGLDO_CON31 0x044E +#define MT6397_DIGLDO_CON32 0x0450 +#define MT6397_DIGLDO_CON33 0x045A +#define MT6397_SPK_CON0 0x0600 +#define MT6397_SPK_CON1 0x0602 +#define MT6397_SPK_CON2 0x0604 +#define MT6397_SPK_CON3 0x0606 +#define MT6397_SPK_CON4 0x0608 +#define MT6397_SPK_CON5 0x060A +#define MT6397_SPK_CON6 0x060C +#define MT6397_SPK_CON7 0x060E +#define MT6397_SPK_CON8 0x0610 +#define MT6397_SPK_CON9 0x0612 +#define MT6397_SPK_CON10 0x0614 +#define MT6397_SPK_CON11 0x0616 +#define MT6397_AUDDAC_CON0 0x0700 +#define MT6397_AUDBUF_CFG0 0x0702 +#define MT6397_AUDBUF_CFG1 0x0704 +#define MT6397_AUDBUF_CFG2 0x0706 +#define MT6397_AUDBUF_CFG3 0x0708 +#define MT6397_AUDBUF_CFG4 0x070A +#define MT6397_IBIASDIST_CFG0 0x070C +#define MT6397_AUDACCDEPOP_CFG0 0x070E +#define MT6397_AUD_IV_CFG0 0x0710 +#define MT6397_AUDCLKGEN_CFG0 0x0712 +#define MT6397_AUDLDO_CFG0 0x0714 +#define MT6397_AUDLDO_CFG1 0x0716 +#define MT6397_AUDNVREGGLB_CFG0 0x0718 +#define MT6397_AUD_NCP0 0x071A +#define MT6397_AUDPREAMP_CON0 0x071C +#define MT6397_AUDADC_CON0 0x071E +#define MT6397_AUDADC_CON1 0x0720 +#define MT6397_AUDADC_CON2 0x0722 +#define MT6397_AUDADC_CON3 0x0724 +#define MT6397_AUDADC_CON4 0x0726 +#define MT6397_AUDADC_CON5 0x0728 +#define MT6397_AUDADC_CON6 0x072A +#define MT6397_AUDDIGMI_CON0 0x072C +#define MT6397_AUDLSBUF_CON0 0x072E +#define MT6397_AUDLSBUF_CON1 0x0730 +#define MT6397_AUDENCSPARE_CON0 0x0732 +#define MT6397_AUDENCCLKSQ_CON0 0x0734 +#define MT6397_AUDPREAMPGAIN_CON0 0x0736 +#define MT6397_ZCD_CON0 0x0738 +#define MT6397_ZCD_CON1 0x073A +#define MT6397_ZCD_CON2 0x073C +#define MT6397_ZCD_CON3 0x073E +#define MT6397_ZCD_CON4 0x0740 +#define MT6397_ZCD_CON5 0x0742 +#define MT6397_NCP_CLKDIV_CON0 0x0744 +#define MT6397_NCP_CLKDIV_CON1 0x0746 + +#endif /* __MFD_MT6397_REGISTERS_H__ */ -- cgit v1.2.3 From c7f585fe46d834d5837db7fbe205c46b94f81dc2 Mon Sep 17 00:00:00 2001 From: Jaewon Kim Date: Mon, 2 Mar 2015 19:10:34 +0900 Subject: mfd: max77843: Add max77843 MFD driver core driver This patch adds MAX77843 core/irq driver to support PMIC, MUIC(Micro USB Interface Controller), Charger, Fuel Gauge, LED and Haptic device. Signed-off-by: Jaewon Kim Signed-off-by: Beomho Seo Signed-off-by: Lee Jones --- include/linux/mfd/max77843-private.h | 454 +++++++++++++++++++++++++++++++++++ 1 file changed, 454 insertions(+) create mode 100644 include/linux/mfd/max77843-private.h (limited to 'include/linux') diff --git a/include/linux/mfd/max77843-private.h b/include/linux/mfd/max77843-private.h new file mode 100644 index 000000000000..7178ace8379e --- /dev/null +++ b/include/linux/mfd/max77843-private.h @@ -0,0 +1,454 @@ +/* + * Common variables for the Maxim MAX77843 driver + * + * Copyright (C) 2015 Samsung Electronics + * Author: Jaewon Kim + * Author: Beomho Seo + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __MAX77843_PRIVATE_H_ +#define __MAX77843_PRIVATE_H_ + +#include +#include + +#define I2C_ADDR_TOPSYS (0xCC >> 1) +#define I2C_ADDR_CHG (0xD2 >> 1) +#define I2C_ADDR_FG (0x6C >> 1) +#define I2C_ADDR_MUIC (0x4A >> 1) + +/* Topsys, Haptic and LED registers */ +enum max77843_sys_reg { + MAX77843_SYS_REG_PMICID = 0x00, + MAX77843_SYS_REG_PMICREV = 0x01, + MAX77843_SYS_REG_MAINCTRL1 = 0x02, + MAX77843_SYS_REG_INTSRC = 0x22, + MAX77843_SYS_REG_INTSRCMASK = 0x23, + MAX77843_SYS_REG_SYSINTSRC = 0x24, + MAX77843_SYS_REG_SYSINTMASK = 0x26, + MAX77843_SYS_REG_TOPSYS_STAT = 0x28, + MAX77843_SYS_REG_SAFEOUTCTRL = 0xC6, + + MAX77843_SYS_REG_END, +}; + +enum max77843_haptic_reg { + MAX77843_HAP_REG_MCONFIG = 0x10, + + MAX77843_HAP_REG_END, +}; + +enum max77843_led_reg { + MAX77843_LED_REG_LEDEN = 0x30, + MAX77843_LED_REG_LED0BRT = 0x31, + MAX77843_LED_REG_LED1BRT = 0x32, + MAX77843_LED_REG_LED2BRT = 0x33, + MAX77843_LED_REG_LED3BRT = 0x34, + MAX77843_LED_REG_LEDBLNK = 0x38, + MAX77843_LED_REG_LEDRAMP = 0x36, + + MAX77843_LED_REG_END, +}; + +/* Charger registers */ +enum max77843_charger_reg { + MAX77843_CHG_REG_CHG_INT = 0xB0, + MAX77843_CHG_REG_CHG_INT_MASK = 0xB1, + MAX77843_CHG_REG_CHG_INT_OK = 0xB2, + MAX77843_CHG_REG_CHG_DTLS_00 = 0xB3, + MAX77843_CHG_REG_CHG_DTLS_01 = 0xB4, + MAX77843_CHG_REG_CHG_DTLS_02 = 0xB5, + MAX77843_CHG_REG_CHG_CNFG_00 = 0xB7, + MAX77843_CHG_REG_CHG_CNFG_01 = 0xB8, + MAX77843_CHG_REG_CHG_CNFG_02 = 0xB9, + MAX77843_CHG_REG_CHG_CNFG_03 = 0xBA, + MAX77843_CHG_REG_CHG_CNFG_04 = 0xBB, + MAX77843_CHG_REG_CHG_CNFG_06 = 0xBD, + MAX77843_CHG_REG_CHG_CNFG_07 = 0xBE, + MAX77843_CHG_REG_CHG_CNFG_09 = 0xC0, + MAX77843_CHG_REG_CHG_CNFG_10 = 0xC1, + MAX77843_CHG_REG_CHG_CNFG_11 = 0xC2, + MAX77843_CHG_REG_CHG_CNFG_12 = 0xC3, + + MAX77843_CHG_REG_END, +}; + +/* Fuel gauge registers */ +enum max77843_fuelgauge { + MAX77843_FG_REG_STATUS = 0x00, + MAX77843_FG_REG_VALRT_TH = 0x01, + MAX77843_FG_REG_TALRT_TH = 0x02, + MAX77843_FG_REG_SALRT_TH = 0x03, + MAX77843_FG_RATE_AT_RATE = 0x04, + MAX77843_FG_REG_REMCAP_REP = 0x05, + MAX77843_FG_REG_SOCREP = 0x06, + MAX77843_FG_REG_AGE = 0x07, + MAX77843_FG_REG_TEMP = 0x08, + MAX77843_FG_REG_VCELL = 0x09, + MAX77843_FG_REG_CURRENT = 0x0A, + MAX77843_FG_REG_AVG_CURRENT = 0x0B, + MAX77843_FG_REG_SOCMIX = 0x0D, + MAX77843_FG_REG_SOCAV = 0x0E, + MAX77843_FG_REG_REMCAP_MIX = 0x0F, + MAX77843_FG_REG_FULLCAP = 0x10, + MAX77843_FG_REG_AVG_TEMP = 0x16, + MAX77843_FG_REG_CYCLES = 0x17, + MAX77843_FG_REG_AVG_VCELL = 0x19, + MAX77843_FG_REG_CONFIG = 0x1D, + MAX77843_FG_REG_REMCAP_AV = 0x1F, + MAX77843_FG_REG_FULLCAP_NOM = 0x23, + MAX77843_FG_REG_MISCCFG = 0x2B, + MAX77843_FG_REG_RCOMP = 0x38, + MAX77843_FG_REG_FSTAT = 0x3D, + MAX77843_FG_REG_DQACC = 0x45, + MAX77843_FG_REG_DPACC = 0x46, + MAX77843_FG_REG_OCV = 0xEE, + MAX77843_FG_REG_VFOCV = 0xFB, + MAX77843_FG_SOCVF = 0xFF, + + MAX77843_FG_END, +}; + +/* MUIC registers */ +enum max77843_muic_reg { + MAX77843_MUIC_REG_ID = 0x00, + MAX77843_MUIC_REG_INT1 = 0x01, + MAX77843_MUIC_REG_INT2 = 0x02, + MAX77843_MUIC_REG_INT3 = 0x03, + MAX77843_MUIC_REG_STATUS1 = 0x04, + MAX77843_MUIC_REG_STATUS2 = 0x05, + MAX77843_MUIC_REG_STATUS3 = 0x06, + MAX77843_MUIC_REG_INTMASK1 = 0x07, + MAX77843_MUIC_REG_INTMASK2 = 0x08, + MAX77843_MUIC_REG_INTMASK3 = 0x09, + MAX77843_MUIC_REG_CDETCTRL1 = 0x0A, + MAX77843_MUIC_REG_CDETCTRL2 = 0x0B, + MAX77843_MUIC_REG_CONTROL1 = 0x0C, + MAX77843_MUIC_REG_CONTROL2 = 0x0D, + MAX77843_MUIC_REG_CONTROL3 = 0x0E, + MAX77843_MUIC_REG_CONTROL4 = 0x16, + MAX77843_MUIC_REG_HVCONTROL1 = 0x17, + MAX77843_MUIC_REG_HVCONTROL2 = 0x18, + + MAX77843_MUIC_REG_END, +}; + +enum max77843_irq { + /* Topsys: SYSTEM */ + MAX77843_SYS_IRQ_SYSINTSRC_SYSUVLO_INT, + MAX77843_SYS_IRQ_SYSINTSRC_SYSOVLO_INT, + MAX77843_SYS_IRQ_SYSINTSRC_TSHDN_INT, + MAX77843_SYS_IRQ_SYSINTSRC_TM_INT, + + /* Charger: CHG_INT */ + MAX77843_CHG_IRQ_CHG_INT_BYP_I, + MAX77843_CHG_IRQ_CHG_INT_BATP_I, + MAX77843_CHG_IRQ_CHG_INT_BAT_I, + MAX77843_CHG_IRQ_CHG_INT_CHG_I, + MAX77843_CHG_IRQ_CHG_INT_WCIN_I, + MAX77843_CHG_IRQ_CHG_INT_CHGIN_I, + MAX77843_CHG_IRQ_CHG_INT_AICL_I, + + MAX77843_IRQ_NUM, +}; + +enum max77843_irq_muic { + /* MUIC: INT1 */ + MAX77843_MUIC_IRQ_INT1_ADC, + MAX77843_MUIC_IRQ_INT1_ADCERROR, + MAX77843_MUIC_IRQ_INT1_ADC1K, + + /* MUIC: INT2 */ + MAX77843_MUIC_IRQ_INT2_CHGTYP, + MAX77843_MUIC_IRQ_INT2_CHGDETRUN, + MAX77843_MUIC_IRQ_INT2_DCDTMR, + MAX77843_MUIC_IRQ_INT2_DXOVP, + MAX77843_MUIC_IRQ_INT2_VBVOLT, + + /* MUIC: INT3 */ + MAX77843_MUIC_IRQ_INT3_VBADC, + MAX77843_MUIC_IRQ_INT3_VDNMON, + MAX77843_MUIC_IRQ_INT3_DNRES, + MAX77843_MUIC_IRQ_INT3_MPNACK, + MAX77843_MUIC_IRQ_INT3_MRXBUFOW, + MAX77843_MUIC_IRQ_INT3_MRXTRF, + MAX77843_MUIC_IRQ_INT3_MRXPERR, + MAX77843_MUIC_IRQ_INT3_MRXRDY, + + MAX77843_MUIC_IRQ_NUM, +}; + +/* MAX77843 interrupts */ +#define MAX77843_SYS_IRQ_SYSUVLO_INT BIT(0) +#define MAX77843_SYS_IRQ_SYSOVLO_INT BIT(1) +#define MAX77843_SYS_IRQ_TSHDN_INT BIT(2) +#define MAX77843_SYS_IRQ_TM_INT BIT(3) + +/* MAX77843 MAINCTRL1 register */ +#define MAINCTRL1_BIASEN_SHIFT 7 +#define MAX77843_MAINCTRL1_BIASEN_MASK BIT(MAINCTRL1_BIASEN_SHIFT) + +/* MAX77843 MCONFIG register */ +#define MCONFIG_MODE_SHIFT 7 +#define MCONFIG_MEN_SHIFT 6 +#define MCONFIG_PDIV_SHIFT 0 + +#define MAX77843_MCONFIG_MODE_MASK BIT(MCONFIG_MODE_SHIFT) +#define MAX77843_MCONFIG_MEN_MASK BIT(MCONFIG_MEN_SHIFT) +#define MAX77843_MCONFIG_PDIV_MASK (0x3 << MCONFIG_PDIV_SHIFT) + +/* Max77843 charger insterrupts */ +#define MAX77843_CHG_BYP_I BIT(0) +#define MAX77843_CHG_BATP_I BIT(2) +#define MAX77843_CHG_BAT_I BIT(3) +#define MAX77843_CHG_CHG_I BIT(4) +#define MAX77843_CHG_WCIN_I BIT(5) +#define MAX77843_CHG_CHGIN_I BIT(6) +#define MAX77843_CHG_AICL_I BIT(7) + +/* MAX77843 CHG_INT_OK register */ +#define MAX77843_CHG_BYP_OK BIT(0) +#define MAX77843_CHG_BATP_OK BIT(2) +#define MAX77843_CHG_BAT_OK BIT(3) +#define MAX77843_CHG_CHG_OK BIT(4) +#define MAX77843_CHG_WCIN_OK BIT(5) +#define MAX77843_CHG_CHGIN_OK BIT(6) +#define MAX77843_CHG_AICL_OK BIT(7) + +/* MAX77843 CHG_DETAILS_00 register */ +#define MAX77843_CHG_BAT_DTLS BIT(0) + +/* MAX77843 CHG_DETAILS_01 register */ +#define MAX77843_CHG_DTLS_MASK 0x0f +#define MAX77843_CHG_PQ_MODE 0x00 +#define MAX77843_CHG_CC_MODE 0x01 +#define MAX77843_CHG_CV_MODE 0x02 +#define MAX77843_CHG_TO_MODE 0x03 +#define MAX77843_CHG_DO_MODE 0x04 +#define MAX77843_CHG_HT_MODE 0x05 +#define MAX77843_CHG_TF_MODE 0x06 +#define MAX77843_CHG_TS_MODE 0x07 +#define MAX77843_CHG_OFF_MODE 0x08 + +#define MAX77843_CHG_BAT_DTLS_MASK 0xf0 +#define MAX77843_CHG_NO_BAT (0x00 << 4) +#define MAX77843_CHG_LOW_VOLT_BAT (0x01 << 4) +#define MAX77843_CHG_LONG_BAT_TIME (0x02 << 4) +#define MAX77843_CHG_OK_BAT (0x03 << 4) +#define MAX77843_CHG_OK_LOW_VOLT_BAT (0x04 << 4) +#define MAX77843_CHG_OVER_VOLT_BAT (0x05 << 4) +#define MAX77843_CHG_OVER_CURRENT_BAT (0x06 << 4) + +/* MAX77843 CHG_CNFG_00 register */ +#define MAX77843_CHG_DISABLE 0x00 +#define MAX77843_CHG_ENABLE 0x05 +#define MAX77843_CHG_MASK 0x01 +#define MAX77843_CHG_BUCK_MASK 0x04 + +/* MAX77843 CHG_CNFG_01 register */ +#define MAX77843_CHG_RESTART_THRESHOLD_100 0x00 +#define MAX77843_CHG_RESTART_THRESHOLD_150 0x10 +#define MAX77843_CHG_RESTART_THRESHOLD_200 0x20 +#define MAX77843_CHG_RESTART_THRESHOLD_DISABLE 0x30 + +/* MAX77843 CHG_CNFG_02 register */ +#define MAX77843_CHG_FAST_CHG_CURRENT_MIN 100000 +#define MAX77843_CHG_FAST_CHG_CURRENT_MAX 3150000 +#define MAX77843_CHG_FAST_CHG_CURRENT_STEP 50000 +#define MAX77843_CHG_FAST_CHG_CURRENT_MASK 0x3f +#define MAX77843_CHG_OTG_ILIMIT_500 (0x00 << 6) +#define MAX77843_CHG_OTG_ILIMIT_900 (0x01 << 6) +#define MAX77843_CHG_OTG_ILIMIT_1200 (0x02 << 6) +#define MAX77843_CHG_OTG_ILIMIT_1500 (0x03 << 6) +#define MAX77843_CHG_OTG_ILIMIT_MASK 0xc0 + +/* MAX77843 CHG_CNFG_03 register */ +#define MAX77843_CHG_TOP_OFF_CURRENT_MIN 125000 +#define MAX77843_CHG_TOP_OFF_CURRENT_MAX 650000 +#define MAX77843_CHG_TOP_OFF_CURRENT_STEP 75000 +#define MAX77843_CHG_TOP_OFF_CURRENT_MASK 0x07 + +/* MAX77843 CHG_CNFG_06 register */ +#define MAX77843_CHG_WRITE_CAP_BLOCK 0x10 +#define MAX77843_CHG_WRITE_CAP_UNBLOCK 0x0C + +/* MAX77843_CHG_CNFG_09_register */ +#define MAX77843_CHG_INPUT_CURRENT_LIMIT_MIN 100000 +#define MAX77843_CHG_INPUT_CURRENT_LIMIT_MAX 4000000 +#define MAX77843_CHG_INPUT_CURRENT_LIMIT_REF 3367000 +#define MAX77843_CHG_INPUT_CURRENT_LIMIT_STEP 33000 + +#define MAX77843_MUIC_ADC BIT(0) +#define MAX77843_MUIC_ADCERROR BIT(2) +#define MAX77843_MUIC_ADC1K BIT(3) + +#define MAX77843_MUIC_CHGTYP BIT(0) +#define MAX77843_MUIC_CHGDETRUN BIT(1) +#define MAX77843_MUIC_DCDTMR BIT(2) +#define MAX77843_MUIC_DXOVP BIT(3) +#define MAX77843_MUIC_VBVOLT BIT(4) + +#define MAX77843_MUIC_VBADC BIT(0) +#define MAX77843_MUIC_VDNMON BIT(1) +#define MAX77843_MUIC_DNRES BIT(2) +#define MAX77843_MUIC_MPNACK BIT(3) +#define MAX77843_MUIC_MRXBUFOW BIT(4) +#define MAX77843_MUIC_MRXTRF BIT(5) +#define MAX77843_MUIC_MRXPERR BIT(6) +#define MAX77843_MUIC_MRXRDY BIT(7) + +/* MAX77843 INTSRCMASK register */ +#define MAX77843_INTSRCMASK_CHGR 0 +#define MAX77843_INTSRCMASK_SYS 1 +#define MAX77843_INTSRCMASK_FG 2 +#define MAX77843_INTSRCMASK_MUIC 3 + +#define MAX77843_INTSRCMASK_CHGR_MASK BIT(MAX77843_INTSRCMASK_CHGR) +#define MAX77843_INTSRCMASK_SYS_MASK BIT(MAX77843_INTSRCMASK_SYS) +#define MAX77843_INTSRCMASK_FG_MASK BIT(MAX77843_INTSRCMASK_FG) +#define MAX77843_INTSRCMASK_MUIC_MASK BIT(MAX77843_INTSRCMASK_MUIC) + +#define MAX77843_INTSRC_MASK_MASK \ + (MAX77843_INTSRCMASK_MUIC_MASK | MAX77843_INTSRCMASK_FG_MASK | \ + MAX77843_INTSRCMASK_SYS_MASK | MAX77843_INTSRCMASK_CHGR_MASK) + +/* MAX77843 STATUS register*/ +#define STATUS1_ADC_SHIFT 0 +#define STATUS1_ADCERROR_SHIFT 6 +#define STATUS1_ADC1K_SHIFT 7 +#define STATUS2_CHGTYP_SHIFT 0 +#define STATUS2_CHGDETRUN_SHIFT 3 +#define STATUS2_DCDTMR_SHIFT 4 +#define STATUS2_DXOVP_SHIFT 5 +#define STATUS2_VBVOLT_SHIFT 6 +#define STATUS3_VBADC_SHIFT 0 +#define STATUS3_VDNMON_SHIFT 4 +#define STATUS3_DNRES_SHIFT 5 +#define STATUS3_MPNACK_SHIFT 6 + +#define MAX77843_MUIC_STATUS1_ADC_MASK (0x1f << STATUS1_ADC_SHIFT) +#define MAX77843_MUIC_STATUS1_ADCERROR_MASK BIT(STATUS1_ADCERROR_SHIFT) +#define MAX77843_MUIC_STATUS1_ADC1K_MASK BIT(STATUS1_ADC1K_SHIFT) +#define MAX77843_MUIC_STATUS2_CHGTYP_MASK (0x7 << STATUS2_CHGTYP_SHIFT) +#define MAX77843_MUIC_STATUS2_CHGDETRUN_MASK BIT(STATUS2_CHGDETRUN_SHIFT) +#define MAX77843_MUIC_STATUS2_DCDTMR_MASK BIT(STATUS2_DCDTMR_SHIFT) +#define MAX77843_MUIC_STATUS2_DXOVP_MASK BIT(STATUS2_DXOVP_SHIFT) +#define MAX77843_MUIC_STATUS2_VBVOLT_MASK BIT(STATUS2_VBVOLT_SHIFT) +#define MAX77843_MUIC_STATUS3_VBADC_MASK (0xf << STATUS3_VBADC_SHIFT) +#define MAX77843_MUIC_STATUS3_VDNMON_MASK BIT(STATUS3_VDNMON_SHIFT) +#define MAX77843_MUIC_STATUS3_DNRES_MASK BIT(STATUS3_DNRES_SHIFT) +#define MAX77843_MUIC_STATUS3_MPNACK_MASK BIT(STATUS3_MPNACK_SHIFT) + +/* MAX77843 CONTROL register */ +#define CONTROL1_COMP1SW_SHIFT 0 +#define CONTROL1_COMP2SW_SHIFT 3 +#define CONTROL1_IDBEN_SHIFT 7 +#define CONTROL2_LOWPWR_SHIFT 0 +#define CONTROL2_ADCEN_SHIFT 1 +#define CONTROL2_CPEN_SHIFT 2 +#define CONTROL2_ACC_DET_SHIFT 5 +#define CONTROL2_USBCPINT_SHIFT 6 +#define CONTROL2_RCPS_SHIFT 7 +#define CONTROL3_JIGSET_SHIFT 0 +#define CONTROL4_ADCDBSET_SHIFT 0 +#define CONTROL4_USBAUTO_SHIFT 4 +#define CONTROL4_FCTAUTO_SHIFT 5 +#define CONTROL4_ADCMODE_SHIFT 6 + +#define MAX77843_MUIC_CONTROL1_COMP1SW_MASK (0x7 << CONTROL1_COMP1SW_SHIFT) +#define MAX77843_MUIC_CONTROL1_COMP2SW_MASK (0x7 << CONTROL1_COMP2SW_SHIFT) +#define MAX77843_MUIC_CONTROL1_IDBEN_MASK BIT(CONTROL1_IDBEN_SHIFT) +#define MAX77843_MUIC_CONTROL2_LOWPWR_MASK BIT(CONTROL2_LOWPWR_SHIFT) +#define MAX77843_MUIC_CONTROL2_ADCEN_MASK BIT(CONTROL2_ADCEN_SHIFT) +#define MAX77843_MUIC_CONTROL2_CPEN_MASK BIT(CONTROL2_CPEN_SHIFT) +#define MAX77843_MUIC_CONTROL2_ACC_DET_MASK BIT(CONTROL2_ACC_DET_SHIFT) +#define MAX77843_MUIC_CONTROL2_USBCPINT_MASK BIT(CONTROL2_USBCPINT_SHIFT) +#define MAX77843_MUIC_CONTROL2_RCPS_MASK BIT(CONTROL2_RCPS_SHIFT) +#define MAX77843_MUIC_CONTROL3_JIGSET_MASK (0x3 << CONTROL3_JIGSET_SHIFT) +#define MAX77843_MUIC_CONTROL4_ADCDBSET_MASK (0x3 << CONTROL4_ADCDBSET_SHIFT) +#define MAX77843_MUIC_CONTROL4_USBAUTO_MASK BIT(CONTROL4_USBAUTO_SHIFT) +#define MAX77843_MUIC_CONTROL4_FCTAUTO_MASK BIT(CONTROL4_FCTAUTO_SHIFT) +#define MAX77843_MUIC_CONTROL4_ADCMODE_MASK (0x3 << CONTROL4_ADCMODE_SHIFT) + +/* MAX77843 switch port */ +#define COM_OPEN 0 +#define COM_USB 1 +#define COM_AUDIO 2 +#define COM_UART 3 +#define COM_AUX_USB 4 +#define COM_AUX_UART 5 + +#define CONTROL1_COM_SW \ + ((MAX77843_MUIC_CONTROL1_COMP1SW_MASK | \ + MAX77843_MUIC_CONTROL1_COMP2SW_MASK)) + +#define CONTROL1_SW_OPEN \ + ((COM_OPEN << CONTROL1_COMP1SW_SHIFT | \ + COM_OPEN << CONTROL1_COMP2SW_SHIFT)) +#define CONTROL1_SW_USB \ + ((COM_USB << CONTROL1_COMP1SW_SHIFT | \ + COM_USB << CONTROL1_COMP2SW_SHIFT)) +#define CONTROL1_SW_AUDIO \ + ((COM_AUDIO << CONTROL1_COMP1SW_SHIFT | \ + COM_AUDIO << CONTROL1_COMP2SW_SHIFT)) +#define CONTROL1_SW_UART \ + ((COM_UART << CONTROL1_COMP1SW_SHIFT | \ + COM_UART << CONTROL1_COMP2SW_SHIFT)) +#define CONTROL1_SW_AUX_USB \ + ((COM_AUX_USB << CONTROL1_COMP1SW_SHIFT | \ + COM_AUX_USB << CONTROL1_COMP2SW_SHIFT)) +#define CONTROL1_SW_AUX_UART \ + ((COM_AUX_UART << CONTROL1_COMP1SW_SHIFT | \ + COM_AUX_UART << CONTROL1_COMP2SW_SHIFT)) + +#define MAX77843_DISABLE 0 +#define MAX77843_ENABLE 1 + +#define CONTROL4_AUTO_DISABLE \ + ((MAX77843_DISABLE << CONTROL4_USBAUTO_SHIFT) | \ + (MAX77843_DISABLE << CONTROL4_FCTAUTO_SHIFT)) +#define CONTROL4_AUTO_ENABLE \ + ((MAX77843_ENABLE << CONTROL4_USBAUTO_SHIFT) | \ + (MAX77843_ENABLE << CONTROL4_FCTAUTO_SHIFT)) + +/* MAX77843 SAFEOUT LDO Control register */ +#define SAFEOUTCTRL_SAFEOUT1_SHIFT 0 +#define SAFEOUTCTRL_SAFEOUT2_SHIFT 2 +#define SAFEOUTCTRL_ENSAFEOUT1_SHIFT 6 +#define SAFEOUTCTRL_ENSAFEOUT2_SHIFT 7 + +#define MAX77843_REG_SAFEOUTCTRL_ENSAFEOUT1 \ + BIT(SAFEOUTCTRL_ENSAFEOUT1_SHIFT) +#define MAX77843_REG_SAFEOUTCTRL_ENSAFEOUT2 \ + BIT(SAFEOUTCTRL_ENSAFEOUT2_SHIFT) +#define MAX77843_REG_SAFEOUTCTRL_SAFEOUT1_MASK \ + (0x3 << SAFEOUTCTRL_SAFEOUT1_SHIFT) +#define MAX77843_REG_SAFEOUTCTRL_SAFEOUT2_MASK \ + (0x3 << SAFEOUTCTRL_SAFEOUT2_SHIFT) + +struct max77843 { + struct device *dev; + + struct i2c_client *i2c; + struct i2c_client *i2c_chg; + struct i2c_client *i2c_fuel; + struct i2c_client *i2c_muic; + + struct regmap *regmap; + struct regmap *regmap_chg; + struct regmap *regmap_fuel; + struct regmap *regmap_muic; + + struct regmap_irq_chip_data *irq_data; + struct regmap_irq_chip_data *irq_data_chg; + struct regmap_irq_chip_data *irq_data_fuel; + struct regmap_irq_chip_data *irq_data_muic; + + int irq; +}; +#endif /* __MAX77843_H__ */ -- cgit v1.2.3 From e921eea8e7d4457f424bc3f821cb836e35b91f88 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 3 Mar 2015 12:05:00 +0100 Subject: dmaengine: Remove memset leftovers Commit 48a9db462d99 ("drivers/dma: remove unused support for MEMSET operations") removed support for the memset operation in dmaengine, but left the fill_aligned field that was supposed to set the buffer alignment for the memset operations. Remove that field too. Signed-off-by: Maxime Ripard Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index b6997a0cb528..db0104b0da4d 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -574,7 +574,6 @@ struct dma_tx_state { * @copy_align: alignment shift for memcpy operations * @xor_align: alignment shift for xor operations * @pq_align: alignment shift for pq operations - * @fill_align: alignment shift for memset operations * @dev_id: unique device ID * @dev: struct device reference for dma mapping api * @src_addr_widths: bit mask of src addr widths the device supports @@ -625,7 +624,6 @@ struct dma_device { u8 copy_align; u8 xor_align; u8 pq_align; - u8 fill_align; #define DMA_HAS_PQ_CONTINUE (1 << 15) int dev_id; @@ -826,12 +824,6 @@ static inline bool is_dma_pq_aligned(struct dma_device *dev, size_t off1, return dmaengine_check_align(dev->pq_align, off1, off2, len); } -static inline bool is_dma_fill_aligned(struct dma_device *dev, size_t off1, - size_t off2, size_t len) -{ - return dmaengine_check_align(dev->fill_align, off1, off2, len); -} - static inline void dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue) { -- cgit v1.2.3 From 1feb57a245a4910b03202a814ffc51a900bd4aca Mon Sep 17 00:00:00 2001 From: Olliver Schinagl Date: Wed, 21 Jan 2015 22:33:46 +0100 Subject: gpio: add parameter to allow the use named gpios The gpio binding document says that new code should always use named gpios. Patch 40b73183 added support to parse a list of gpios from child nodes, but does not make it possible to use named gpios. This patch adds the con_id property and implements it is done in gpiolib.c, where the old-style of using unnamed gpios still works. Signed-off-by: Olliver Schinagl Acked-by: Bryan Wu Acked-by: Dmitry Torokhov Reviewed-by: Alexandre Courbot Signed-off-by: Linus Walleij --- include/linux/gpio/consumer.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 45afc2dee560..ed20759229eb 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -110,6 +110,7 @@ struct fwnode_handle; struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode, const char *propname); struct gpio_desc *devm_get_gpiod_from_child(struct device *dev, + const char *con_id, struct fwnode_handle *child); #else /* CONFIG_GPIOLIB */ -- cgit v1.2.3 From 0a240339a8deeb13a19043389bba4285a6c0592e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 19 Nov 2014 00:42:09 +0100 Subject: quota: Make VFS quotas use new interface for getting quota info Create new internal interface for getting information about quota which contains everything needed for both VFS quotas and XFS quotas. Make VFS use this and hook it up to Q_GETINFO. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara --- include/linux/quota.h | 33 ++++++++++++++++++++++++++++++++- include/linux/quotaops.h | 2 +- 2 files changed, 33 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/quota.h b/include/linux/quota.h index d534e8ed308a..6ecac0f3b2ca 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -366,6 +366,37 @@ struct qc_dqblk { #define QC_RT_SPACE (1<<14) #define QC_ACCT_MASK (QC_SPACE | QC_INO_COUNT | QC_RT_SPACE) +#define QCI_SYSFILE (1 << 0) /* Quota file is hidden from userspace */ +#define QCI_ROOT_SQUASH (1 << 1) /* Root squash turned on */ +#define QCI_ACCT_ENABLED (1 << 2) /* Quota accounting enabled */ +#define QCI_LIMITS_ENFORCED (1 << 3) /* Quota limits enforced */ + +/* Structures for communicating via ->get_state */ +struct qc_type_state { + unsigned int flags; /* Flags QCI_* */ + unsigned int spc_timelimit; /* Time after which space softlimit is + * enforced */ + unsigned int ino_timelimit; /* Ditto for inode softlimit */ + unsigned int rt_spc_timelimit; /* Ditto for real-time space */ + unsigned int spc_warnlimit; /* Limit for number of space warnings */ + unsigned int ino_warnlimit; /* Ditto for inodes */ + unsigned int rt_spc_warnlimit; /* Ditto for real-time space */ + unsigned long long ino; /* Inode number of quota file */ + blkcnt_t blocks; /* Number of 512-byte blocks in the file */ + blkcnt_t nextents; /* Number of extents in the file */ +}; + +struct qc_state { + unsigned int s_incoredqs; /* Number of dquots in core */ + /* + * Per quota type information. The array should really have + * max(MAXQUOTAS, XQM_MAXQUOTAS) entries. BUILD_BUG_ON in + * quota_getinfo() makes sure XQM_MAXQUOTAS is large enough. Once VFS + * supports project quotas, this can be changed to MAXQUOTAS + */ + struct qc_type_state s_state[XQM_MAXQUOTAS]; +}; + /* Operations handling requests from userspace */ struct quotactl_ops { int (*quota_on)(struct super_block *, int, int, struct path *); @@ -373,10 +404,10 @@ struct quotactl_ops { int (*quota_enable)(struct super_block *, unsigned int); int (*quota_disable)(struct super_block *, unsigned int); int (*quota_sync)(struct super_block *, int); - int (*get_info)(struct super_block *, int, struct if_dqinfo *); int (*set_info)(struct super_block *, int, struct if_dqinfo *); int (*get_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *); int (*set_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *); + int (*get_state)(struct super_block *, struct qc_state *); int (*get_xstate)(struct super_block *, struct fs_quota_stat *); int (*get_xstatev)(struct super_block *, struct fs_quota_statv *); int (*rm_xquota)(struct super_block *, unsigned int); diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index df73258cca47..6509a29523e2 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -95,7 +95,7 @@ int dquot_quota_on_mount(struct super_block *sb, char *qf_name, int dquot_quota_off(struct super_block *sb, int type); int dquot_writeback_dquots(struct super_block *sb, int type); int dquot_quota_sync(struct super_block *sb, int type); -int dquot_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); +int dquot_get_state(struct super_block *sb, struct qc_state *state); int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); int dquot_get_dqblk(struct super_block *sb, struct kqid id, struct qc_dqblk *di); -- cgit v1.2.3 From 59b6ba699043e0f55d4057cf2ae79d9c1171bc58 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 19 Nov 2014 16:44:58 +0100 Subject: quota: Remove ->get_xstate and ->get_xstatev callbacks These callbacks are now unused. Remove them. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara --- include/linux/quota.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/quota.h b/include/linux/quota.h index 6ecac0f3b2ca..a07f2ed25284 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -408,8 +408,6 @@ struct quotactl_ops { int (*get_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *); int (*set_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *); int (*get_state)(struct super_block *, struct qc_state *); - int (*get_xstate)(struct super_block *, struct fs_quota_stat *); - int (*get_xstatev)(struct super_block *, struct fs_quota_statv *); int (*rm_xquota)(struct super_block *, unsigned int); }; -- cgit v1.2.3 From 5eacb2ac029161d94969a511e0adf7dca28cda1f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 16 Dec 2014 12:03:51 +0100 Subject: quota: Make ->set_info use structure with neccesary info to VFS and XFS Change ->set_info to take new qc_info structure which contains all the necessary information both for XFS and VFS. Convert Q_SETINFO handler to use this structure. Signed-off-by: Jan Kara --- include/linux/quota.h | 21 +++++++++++++++++++-- include/linux/quotaops.h | 2 +- 2 files changed, 20 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/quota.h b/include/linux/quota.h index a07f2ed25284..3d521199a0bd 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -344,7 +344,10 @@ struct qc_dqblk { int d_rt_spc_warns; /* # warnings issued wrt RT space */ }; -/* Field specifiers for ->set_dqblk() in struct qc_dqblk */ +/* + * Field specifiers for ->set_dqblk() in struct qc_dqblk and also for + * ->set_info() in struct qc_info + */ #define QC_INO_SOFT (1<<0) #define QC_INO_HARD (1<<1) #define QC_SPC_SOFT (1<<2) @@ -365,6 +368,7 @@ struct qc_dqblk { #define QC_INO_COUNT (1<<13) #define QC_RT_SPACE (1<<14) #define QC_ACCT_MASK (QC_SPACE | QC_INO_COUNT | QC_RT_SPACE) +#define QC_FLAGS (1<<15) #define QCI_SYSFILE (1 << 0) /* Quota file is hidden from userspace */ #define QCI_ROOT_SQUASH (1 << 1) /* Root squash turned on */ @@ -397,6 +401,19 @@ struct qc_state { struct qc_type_state s_state[XQM_MAXQUOTAS]; }; +/* Structure for communicating via ->set_info */ +struct qc_info { + int i_fieldmask; /* mask of fields to change in ->set_info() */ + unsigned int i_flags; /* Flags QCI_* */ + unsigned int i_spc_timelimit; /* Time after which space softlimit is + * enforced */ + unsigned int i_ino_timelimit; /* Ditto for inode softlimit */ + unsigned int i_rt_spc_timelimit;/* Ditto for real-time space */ + unsigned int i_spc_warnlimit; /* Limit for number of space warnings */ + unsigned int i_ino_warnlimit; /* Limit for number of inode warnings */ + unsigned int i_rt_spc_warnlimit; /* Ditto for real-time space */ +}; + /* Operations handling requests from userspace */ struct quotactl_ops { int (*quota_on)(struct super_block *, int, int, struct path *); @@ -404,7 +421,7 @@ struct quotactl_ops { int (*quota_enable)(struct super_block *, unsigned int); int (*quota_disable)(struct super_block *, unsigned int); int (*quota_sync)(struct super_block *, int); - int (*set_info)(struct super_block *, int, struct if_dqinfo *); + int (*set_info)(struct super_block *, int, struct qc_info *); int (*get_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *); int (*set_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *); int (*get_state)(struct super_block *, struct qc_state *); diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 6509a29523e2..9f4b07ba9e8c 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -96,7 +96,7 @@ int dquot_quota_off(struct super_block *sb, int type); int dquot_writeback_dquots(struct super_block *sb, int type); int dquot_quota_sync(struct super_block *sb, int type); int dquot_get_state(struct super_block *sb, struct qc_state *state); -int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); +int dquot_set_dqinfo(struct super_block *sb, int type, struct qc_info *ii); int dquot_get_dqblk(struct super_block *sb, struct kqid id, struct qc_dqblk *di); int dquot_set_dqblk(struct super_block *sb, struct kqid id, -- cgit v1.2.3 From 781970240a56d1c15a9b8ee37d28987b8182f060 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 12 Feb 2015 19:08:16 +0300 Subject: quota: reorder flags in quota state Flags in struct quota_state keep flags for each quota type and some common flags. This patch reorders typed flags: Before: 0 USRQUOTA DQUOT_USAGE_ENABLED 1 USRQUOTA DQUOT_LIMITS_ENABLED 2 USRQUOTA DQUOT_SUSPENDED 3 GRPQUOTA DQUOT_USAGE_ENABLED 4 GRPQUOTA DQUOT_LIMITS_ENABLED 5 GRPQUOTA DQUOT_SUSPENDED 6 DQUOT_QUOTA_SYS_FILE 7 DQUOT_NEGATIVE_USAGE After: 0 USRQUOTA DQUOT_USAGE_ENABLED 1 GRPQUOTA DQUOT_USAGE_ENABLED 2 USRQUOTA DQUOT_LIMITS_ENABLED 3 GRPQUOTA DQUOT_LIMITS_ENABLED 4 USRQUOTA DQUOT_SUSPENDED 5 GRPQUOTA DQUOT_SUSPENDED 6 DQUOT_QUOTA_SYS_FILE 7 DQUOT_NEGATIVE_USAGE Now we can get bitmap of all enabled/suspended quota types without loop. For example suspended: (flags / DQUOT_SUSPENDED) & ((1 << MAXQUOTAS) - 1). add/remove: 0/1 grow/shrink: 3/11 up/down: 56/-215 (-159) function old new delta __dquot_initialize 423 447 +24 dquot_transfer 181 197 +16 dquot_alloc_inode 286 302 +16 dquot_reclaim_space_nodirty 316 313 -3 dquot_claim_space_nodirty 314 311 -3 dquot_resume 286 281 -5 dquot_free_inode 332 324 -8 __dquot_alloc_space 500 492 -8 dquot_disable 1944 1929 -15 dquot_quota_enable 252 236 -16 __dquot_free_space 750 734 -16 dquot_writeback_dquots 625 608 -17 __dquot_transfer 1186 1154 -32 dquot_quota_sync 299 261 -38 dquot_active.isra 54 - -54 Signed-off-by: Konstantin Khlebnikov Signed-off-by: Jan Kara --- include/linux/quota.h | 32 +++++++++++++++++++++++++------- include/linux/quotaops.h | 10 ++-------- 2 files changed, 27 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/quota.h b/include/linux/quota.h index d534e8ed308a..a3374dc3a91b 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -389,7 +389,19 @@ struct quota_format_type { struct quota_format_type *qf_next; }; -/* Quota state flags - they actually come in two flavors - for users and groups */ +/** + * Quota state flags - they actually come in two flavors - for users and groups. + * + * Actual typed flags layout: + * USRQUOTA GRPQUOTA + * DQUOT_USAGE_ENABLED 0x0001 0x0002 + * DQUOT_LIMITS_ENABLED 0x0004 0x0008 + * DQUOT_SUSPENDED 0x0010 0x0020 + * + * Following bits are used for non-typed flags: + * DQUOT_QUOTA_SYS_FILE 0x0040 + * DQUOT_NEGATIVE_USAGE 0x0080 + */ enum { _DQUOT_USAGE_ENABLED = 0, /* Track disk usage for users */ _DQUOT_LIMITS_ENABLED, /* Enforce quota limits for users */ @@ -398,9 +410,9 @@ enum { * memory to turn them on */ _DQUOT_STATE_FLAGS }; -#define DQUOT_USAGE_ENABLED (1 << _DQUOT_USAGE_ENABLED) -#define DQUOT_LIMITS_ENABLED (1 << _DQUOT_LIMITS_ENABLED) -#define DQUOT_SUSPENDED (1 << _DQUOT_SUSPENDED) +#define DQUOT_USAGE_ENABLED (1 << _DQUOT_USAGE_ENABLED * MAXQUOTAS) +#define DQUOT_LIMITS_ENABLED (1 << _DQUOT_LIMITS_ENABLED * MAXQUOTAS) +#define DQUOT_SUSPENDED (1 << _DQUOT_SUSPENDED * MAXQUOTAS) #define DQUOT_STATE_FLAGS (DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED | \ DQUOT_SUSPENDED) /* Other quota flags */ @@ -414,15 +426,21 @@ enum { */ #define DQUOT_NEGATIVE_USAGE (1 << (DQUOT_STATE_LAST + 1)) /* Allow negative quota usage */ - static inline unsigned int dquot_state_flag(unsigned int flags, int type) { - return flags << _DQUOT_STATE_FLAGS * type; + return flags << type; } static inline unsigned int dquot_generic_flag(unsigned int flags, int type) { - return (flags >> _DQUOT_STATE_FLAGS * type) & DQUOT_STATE_FLAGS; + return (flags >> type) & DQUOT_STATE_FLAGS; +} + +/* Bitmap of quota types where flag is set in flags */ +static __always_inline unsigned dquot_state_types(unsigned flags, unsigned flag) +{ + BUILD_BUG_ON_NOT_POWER_OF_2(flag); + return (flags / flag) & ((1 << MAXQUOTAS) - 1); } #ifdef CONFIG_QUOTA_NETLINK_INTERFACE diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index df73258cca47..8778ec4775eb 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -134,10 +134,7 @@ static inline bool sb_has_quota_suspended(struct super_block *sb, int type) static inline unsigned sb_any_quota_suspended(struct super_block *sb) { - unsigned type, tmsk = 0; - for (type = 0; type < MAXQUOTAS; type++) - tmsk |= sb_has_quota_suspended(sb, type) << type; - return tmsk; + return dquot_state_types(sb_dqopt(sb)->flags, DQUOT_SUSPENDED); } /* Does kernel know about any quota information for given sb + type? */ @@ -149,10 +146,7 @@ static inline bool sb_has_quota_loaded(struct super_block *sb, int type) static inline unsigned sb_any_quota_loaded(struct super_block *sb) { - unsigned type, tmsk = 0; - for (type = 0; type < MAXQUOTAS; type++) - tmsk |= sb_has_quota_loaded(sb, type) << type; - return tmsk; + return dquot_state_types(sb_dqopt(sb)->flags, DQUOT_USAGE_ENABLED); } static inline bool sb_has_quota_active(struct super_block *sb, int type) -- cgit v1.2.3 From f3dddf2432e3123ef34b470129295641f7513d26 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 4 Mar 2015 14:10:26 -0800 Subject: HID: map telephony usage page Currently HID code maps usages from telephony page into BTN_0, BTN_1, etc keys which get interpreted by mousedev and userspace as left/right/middle button clicks, which is not really helpful. This change adds mappings for usages that have corresponding input event definitions, and leaves the rest unmapped. This can be changed when there are userspace consumers for more telephony usages. Signed-off-by: Dmitry Torokhov Signed-off-by: Jiri Kosina --- include/linux/hid.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index efc7787a41a8..69f9cf7f078d 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -159,6 +159,7 @@ struct hid_item { #define HID_UP_LED 0x00080000 #define HID_UP_BUTTON 0x00090000 #define HID_UP_ORDINAL 0x000a0000 +#define HID_UP_TELEPHONY 0x000b0000 #define HID_UP_CONSUMER 0x000c0000 #define HID_UP_DIGITIZER 0x000d0000 #define HID_UP_PID 0x000f0000 -- cgit v1.2.3 From 9941a383df98193aa9a9b3662af7c1fcbc3070ee Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Mar 2015 10:49:19 +1030 Subject: CPU_MASK_ALL/CPU_MASK_NONE: remove from deprecated region. They're used to initialize various static fields, though static cpumasks should generally be avoided. Signed-off-by: Rusty Russell --- include/linux/cpumask.h | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 086549a665e2..dc037ae6f4f2 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -768,7 +768,7 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu) #if NR_CPUS <= BITS_PER_LONG #define CPU_BITS_ALL \ { \ - [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ + [BITS_TO_LONGS(NR_CPUS)-1] = BITMAP_LAST_WORD_MASK(NR_CPUS) \ } #else /* NR_CPUS > BITS_PER_LONG */ @@ -776,7 +776,7 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu) #define CPU_BITS_ALL \ { \ [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \ - [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ + [BITS_TO_LONGS(NR_CPUS)-1] = BITMAP_LAST_WORD_MASK(NR_CPUS) \ } #endif /* NR_CPUS > BITS_PER_LONG */ @@ -797,38 +797,31 @@ cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask) nr_cpu_ids); } -/* - * - * From here down, all obsolete. Use cpumask_ variants! - * - */ -#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS -#define cpumask_of_cpu(cpu) (*get_cpu_mask(cpu)) - -#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS) - #if NR_CPUS <= BITS_PER_LONG - #define CPU_MASK_ALL \ (cpumask_t) { { \ - [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ + [BITS_TO_LONGS(NR_CPUS)-1] = BITMAP_LAST_WORD_MASK(NR_CPUS) \ } } - #else - #define CPU_MASK_ALL \ (cpumask_t) { { \ [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \ - [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ + [BITS_TO_LONGS(NR_CPUS)-1] = BITMAP_LAST_WORD_MASK(NR_CPUS) \ } } - -#endif +#endif /* NR_CPUS > BITS_PER_LONG */ #define CPU_MASK_NONE \ (cpumask_t) { { \ [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \ } } +/* + * + * From here down, all obsolete. Use cpumask_ variants! + * + */ +#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS + #define CPU_MASK_CPU0 \ (cpumask_t) { { \ [0] = 1UL \ -- cgit v1.2.3 From bfb33bad83f650f265ed65cbfe8352b7c3ce8c76 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 4 Mar 2015 17:24:13 -0800 Subject: init.h: Add early_param_on_off() At times all you need is a kconfig variable to enable a feature, by default but you may want to also enable / disable it through a kernel parameter. In such cases the parameter routines to turn the thing on / off are really simple. Just use a wrapper for this, it lets us generalize the code and makes it easier to associate parameters with related kernel configuration options. Signed-off-by: Luis R. Rodriguez Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Dave Hansen Cc: David Vrabel Cc: Dexuan Cui Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: JBeulich@suse.com Cc: Jan Beulich Cc: Joonsoo Kim Cc: Juergen Gross Cc: Linus Torvalds Cc: Pavel Machek Cc: Thomas Gleixner Cc: Tony Lindgren Cc: Toshi Kani Cc: Vlastimil Babka Cc: Xishi Qiu Cc: julia.lawall@lip6.fr Link: http://lkml.kernel.org/r/1425518654-3403-4-git-send-email-mcgrof@do-not-panic.com Signed-off-by: Ingo Molnar --- include/linux/init.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/init.h b/include/linux/init.h index 2df8e8dd10a4..bc11ff96f336 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -268,6 +268,21 @@ struct obs_kernel_param { #define early_param(str, fn) \ __setup_param(str, fn, fn, 1) +#define early_param_on_off(str_on, str_off, var, config) \ + int var = IS_ENABLED(config); \ + static int __init parse_##var##_on(char *arg) \ + { \ + var = 1; \ + return 0; \ + } \ + static int __init parse_##var##_off(char *arg) \ + { \ + var = 0; \ + return 0; \ + } \ + __setup_param(str_on, parse_##var##_on, parse_##var##_on, 1); \ + __setup_param(str_off, parse_##var##_off, parse_##var##_off, 1) + /* Relies on boot_command_line being set */ void __init parse_early_param(void); void __init parse_early_options(char *cmdline); -- cgit v1.2.3 From c281b94570ab711fdf21e81bdfb3d79764492bcf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 5 Mar 2015 08:28:48 +0100 Subject: init.h: Clean up the __setup()/early_param() macros Make it all a bit easier on the eyes: - Move the __setup_param() lines right after their init functions - Use consistent vertical spacing - Use more horizontal spacing to make it look like regular C code - Use standard comment style Cc: Luis R. Rodriguez Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Dave Hansen Cc: David Vrabel Cc: Dexuan Cui Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: Jan Beulich Cc: Joonsoo Kim Cc: Juergen Gross Cc: Linus Torvalds Cc: Pavel Machek Cc: Thomas Gleixner Cc: Tony Lindgren Cc: Toshi Kani Cc: Vlastimil Babka Cc: Xishi Qiu Cc: julia.lawall@lip6.fr Signed-off-by: Ingo Molnar --- include/linux/init.h | 49 +++++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init.h b/include/linux/init.h index bc11ff96f336..21b6d768edd7 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -253,34 +253,39 @@ struct obs_kernel_param { * obs_kernel_param "array" too far apart in .init.setup. */ #define __setup_param(str, unique_id, fn, early) \ - static const char __setup_str_##unique_id[] __initconst \ - __aligned(1) = str; \ - static struct obs_kernel_param __setup_##unique_id \ - __used __section(.init.setup) \ - __attribute__((aligned((sizeof(long))))) \ + static const char __setup_str_##unique_id[] __initconst \ + __aligned(1) = str; \ + static struct obs_kernel_param __setup_##unique_id \ + __used __section(.init.setup) \ + __attribute__((aligned((sizeof(long))))) \ = { __setup_str_##unique_id, fn, early } -#define __setup(str, fn) \ +#define __setup(str, fn) \ __setup_param(str, fn, fn, 0) -/* NOTE: fn is as per module_param, not __setup! Emits warning if fn - * returns non-zero. */ -#define early_param(str, fn) \ +/* + * NOTE: fn is as per module_param, not __setup! + * Emits warning if fn returns non-zero. + */ +#define early_param(str, fn) \ __setup_param(str, fn, fn, 1) -#define early_param_on_off(str_on, str_off, var, config) \ - int var = IS_ENABLED(config); \ - static int __init parse_##var##_on(char *arg) \ - { \ - var = 1; \ - return 0; \ - } \ - static int __init parse_##var##_off(char *arg) \ - { \ - var = 0; \ - return 0; \ - } \ - __setup_param(str_on, parse_##var##_on, parse_##var##_on, 1); \ +#define early_param_on_off(str_on, str_off, var, config) \ + \ + int var = IS_ENABLED(config); \ + \ + static int __init parse_##var##_on(char *arg) \ + { \ + var = 1; \ + return 0; \ + } \ + __setup_param(str_on, parse_##var##_on, parse_##var##_on, 1); \ + \ + static int __init parse_##var##_off(char *arg) \ + { \ + var = 0; \ + return 0; \ + } \ __setup_param(str_off, parse_##var##_off, parse_##var##_off, 1) /* Relies on boot_command_line being set */ -- cgit v1.2.3 From 668585273246f67b0cdafa30dd2da2047a2e1290 Mon Sep 17 00:00:00 2001 From: Rojhalat Ibrahim Date: Wed, 11 Feb 2015 17:27:58 +0100 Subject: gpiolib: add gpiod_get_array and gpiod_put_array functions Introduce new functions for conveniently obtaining and disposing of an entire array of GPIOs with one function call. ACPI parts tested by Mika Westerberg, DT parts tested by Rojhalat Ibrahim. Change log: v5: move the ACPI functions to gpiolib-acpi.c v4: - use shorter names for members of struct gpio_descs - rename lut_gpio_count to platform_gpio_count for clarity - add check for successful memory allocation - use ERR_CAST() v3: - rebase on current linux-gpio devel branch - fix ACPI GPIO counting - allow for zero-sized arrays - make the flags argument mandatory for the new functions - clarify documentation v2: change interface Suggested-by: Alexandre Courbot Signed-off-by: Rojhalat Ibrahim Reviewed-by: Alexandre Courbot Reviewed-by: Mika Westerberg Tested-by: Mika Westerberg Tested-by: Rojhalat Ibrahim Signed-off-by: Linus Walleij --- include/linux/gpio/consumer.h | 46 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index ed20759229eb..33eb52fd0932 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -16,6 +16,15 @@ struct device; */ struct gpio_desc; +/** + * Struct containing an array of descriptors that can be obtained using + * gpiod_get_array(). + */ +struct gpio_descs { + unsigned int ndescs; + struct gpio_desc *desc[]; +}; + #define GPIOD_FLAGS_BIT_DIR_SET BIT(0) #define GPIOD_FLAGS_BIT_DIR_OUT BIT(1) #define GPIOD_FLAGS_BIT_DIR_VAL BIT(2) @@ -34,6 +43,9 @@ enum gpiod_flags { #ifdef CONFIG_GPIOLIB +/* Return the number of GPIOs associated with a device / function */ +int gpiod_count(struct device *dev, const char *con_id); + /* Acquire and dispose GPIOs */ struct gpio_desc *__must_check __gpiod_get(struct device *dev, const char *con_id, @@ -49,7 +61,14 @@ struct gpio_desc *__must_check __gpiod_get_index_optional(struct device *dev, const char *con_id, unsigned int index, enum gpiod_flags flags); +struct gpio_descs *__must_check gpiod_get_array(struct device *dev, + const char *con_id, + enum gpiod_flags flags); +struct gpio_descs *__must_check gpiod_get_array_optional(struct device *dev, + const char *con_id, + enum gpiod_flags flags); void gpiod_put(struct gpio_desc *desc); +void gpiod_put_array(struct gpio_descs *descs); struct gpio_desc *__must_check __devm_gpiod_get(struct device *dev, const char *con_id, @@ -114,6 +133,11 @@ struct gpio_desc *devm_get_gpiod_from_child(struct device *dev, struct fwnode_handle *child); #else /* CONFIG_GPIOLIB */ +static inline int gpiod_count(struct device *dev, const char *con_id) +{ + return 0; +} + static inline struct gpio_desc *__must_check __gpiod_get(struct device *dev, const char *con_id, enum gpiod_flags flags) @@ -143,6 +167,20 @@ __gpiod_get_index_optional(struct device *dev, const char *con_id, return ERR_PTR(-ENOSYS); } +static inline struct gpio_descs *__must_check +gpiod_get_array(struct device *dev, const char *con_id, + enum gpiod_flags flags) +{ + return ERR_PTR(-ENOSYS); +} + +static inline struct gpio_descs *__must_check +gpiod_get_array_optional(struct device *dev, const char *con_id, + enum gpiod_flags flags) +{ + return ERR_PTR(-ENOSYS); +} + static inline void gpiod_put(struct gpio_desc *desc) { might_sleep(); @@ -151,6 +189,14 @@ static inline void gpiod_put(struct gpio_desc *desc) WARN_ON(1); } +static inline void gpiod_put_array(struct gpio_descs *descs) +{ + might_sleep(); + + /* GPIO can never have been requested */ + WARN_ON(1); +} + static inline struct gpio_desc *__must_check __devm_gpiod_get(struct device *dev, const char *con_id, -- cgit v1.2.3 From 331758eef83620eef3f21289f0f44aba094d0503 Mon Sep 17 00:00:00 2001 From: Rojhalat Ibrahim Date: Wed, 11 Feb 2015 17:28:02 +0100 Subject: gpiolib: add devm_gpiod_get_array and devm_gpiod_put_array functions Add device managed variants of gpiod_get_array() / gpiod_put_array() functions for conveniently obtaining and disposing of an entire array of GPIOs with one function call. Signed-off-by: Rojhalat Ibrahim Reviewed-by: Alexandre Courbot Signed-off-by: Linus Walleij --- include/linux/gpio/consumer.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 33eb52fd0932..3a7c9ffd5ab9 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -83,7 +83,14 @@ struct gpio_desc *__must_check __devm_gpiod_get_optional(struct device *dev, struct gpio_desc *__must_check __devm_gpiod_get_index_optional(struct device *dev, const char *con_id, unsigned int index, enum gpiod_flags flags); +struct gpio_descs *__must_check devm_gpiod_get_array(struct device *dev, + const char *con_id, + enum gpiod_flags flags); +struct gpio_descs *__must_check +devm_gpiod_get_array_optional(struct device *dev, const char *con_id, + enum gpiod_flags flags); void devm_gpiod_put(struct device *dev, struct gpio_desc *desc); +void devm_gpiod_put_array(struct device *dev, struct gpio_descs *descs); int gpiod_get_direction(struct gpio_desc *desc); int gpiod_direction_input(struct gpio_desc *desc); @@ -228,6 +235,20 @@ __devm_gpiod_get_index_optional(struct device *dev, const char *con_id, return ERR_PTR(-ENOSYS); } +static inline struct gpio_descs *__must_check +devm_gpiod_get_array(struct device *dev, const char *con_id, + enum gpiod_flags flags) +{ + return ERR_PTR(-ENOSYS); +} + +static inline struct gpio_descs *__must_check +devm_gpiod_get_array_optional(struct device *dev, const char *con_id, + enum gpiod_flags flags) +{ + return ERR_PTR(-ENOSYS); +} + static inline void devm_gpiod_put(struct device *dev, struct gpio_desc *desc) { might_sleep(); @@ -236,6 +257,15 @@ static inline void devm_gpiod_put(struct device *dev, struct gpio_desc *desc) WARN_ON(1); } +static inline void devm_gpiod_put_array(struct device *dev, + struct gpio_descs *descs) +{ + might_sleep(); + + /* GPIO can never have been requested */ + WARN_ON(1); +} + static inline int gpiod_get_direction(const struct gpio_desc *desc) { -- cgit v1.2.3 From c32ec2a11321978c34296d9a6bd5b0c31a2eb182 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Wed, 4 Mar 2015 12:14:41 +0100 Subject: bcma: make bcma_host_pci_(up|down) calls safe for every config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We were providing declarations but actual code was compiled only with CONFIG_BCMA_HOST_PCI set. This could result in: ERROR: "bcma_host_pci_down" [drivers/net/wireless/brcm80211/brcmsmac/brcmsmac.ko] undefined! ERROR: "bcma_host_pci_up" [drivers/net/wireless/brcm80211/brcmsmac/brcmsmac.ko] undefined! ERROR: "bcma_host_pci_down" [drivers/net/wireless/b43/b43.ko] undefined! ERROR: "bcma_host_pci_up" [drivers/net/wireless/b43/b43.ko] undefined! Reported-by: Arnd Bergmann Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- include/linux/bcma/bcma.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 037620b3f113..44057b45ed32 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -434,8 +434,17 @@ static inline struct bcma_device *bcma_find_core(struct bcma_bus *bus, return bcma_find_core_unit(bus, coreid, 0); } +#ifdef CONFIG_BCMA_HOST_PCI extern void bcma_host_pci_up(struct bcma_bus *bus); extern void bcma_host_pci_down(struct bcma_bus *bus); +#else +static inline void bcma_host_pci_up(struct bcma_bus *bus) +{ +} +static inline void bcma_host_pci_down(struct bcma_bus *bus) +{ +} +#endif extern bool bcma_core_is_enabled(struct bcma_device *core); extern void bcma_core_disable(struct bcma_device *core, u32 flags); -- cgit v1.2.3 From 0a4e699a41f767dff76ca7dc1019b9ca6de3eb42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Wed, 4 Mar 2015 14:24:52 +0100 Subject: bcma: move internal function declarations to private header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These functions are not exported nor used anywhere, so there is no reason to put them in public headers. Also drop unused bcma_chipco_(suspend|resume). Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- include/linux/bcma/bcma_driver_chipcommon.h | 11 ----------- include/linux/bcma/bcma_driver_gmac_cmn.h | 6 ------ include/linux/bcma/bcma_driver_mips.h | 15 --------------- include/linux/bcma/bcma_driver_pci.h | 2 -- include/linux/bcma/bcma_driver_pcie2.h | 2 -- 5 files changed, 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h index db6fa217f98b..6cceedf65ca2 100644 --- a/include/linux/bcma/bcma_driver_chipcommon.h +++ b/include/linux/bcma/bcma_driver_chipcommon.h @@ -663,14 +663,6 @@ struct bcma_drv_cc_b { #define bcma_cc_maskset32(cc, offset, mask, set) \ bcma_cc_write32(cc, offset, (bcma_cc_read32(cc, offset) & (mask)) | (set)) -extern void bcma_core_chipcommon_init(struct bcma_drv_cc *cc); -extern void bcma_core_chipcommon_early_init(struct bcma_drv_cc *cc); - -extern void bcma_chipco_suspend(struct bcma_drv_cc *cc); -extern void bcma_chipco_resume(struct bcma_drv_cc *cc); - -void bcma_chipco_bcm4331_ext_pa_lines_ctl(struct bcma_drv_cc *cc, bool enable); - extern u32 bcma_chipco_watchdog_timer_set(struct bcma_drv_cc *cc, u32 ticks); extern u32 bcma_chipco_get_alp_clock(struct bcma_drv_cc *cc); @@ -690,9 +682,6 @@ u32 bcma_chipco_gpio_pullup(struct bcma_drv_cc *cc, u32 mask, u32 value); u32 bcma_chipco_gpio_pulldown(struct bcma_drv_cc *cc, u32 mask, u32 value); /* PMU support */ -extern void bcma_pmu_init(struct bcma_drv_cc *cc); -extern void bcma_pmu_early_init(struct bcma_drv_cc *cc); - extern void bcma_chipco_pll_write(struct bcma_drv_cc *cc, u32 offset, u32 value); extern void bcma_chipco_pll_maskset(struct bcma_drv_cc *cc, u32 offset, diff --git a/include/linux/bcma/bcma_driver_gmac_cmn.h b/include/linux/bcma/bcma_driver_gmac_cmn.h index 4dd1f33e36a2..4354d4ea6713 100644 --- a/include/linux/bcma/bcma_driver_gmac_cmn.h +++ b/include/linux/bcma/bcma_driver_gmac_cmn.h @@ -91,10 +91,4 @@ struct bcma_drv_gmac_cmn { #define gmac_cmn_write16(gc, offset, val) bcma_write16((gc)->core, offset, val) #define gmac_cmn_write32(gc, offset, val) bcma_write32((gc)->core, offset, val) -#ifdef CONFIG_BCMA_DRIVER_GMAC_CMN -extern void bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc); -#else -static inline void bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc) { } -#endif - #endif /* LINUX_BCMA_DRIVER_GMAC_CMN_H_ */ diff --git a/include/linux/bcma/bcma_driver_mips.h b/include/linux/bcma/bcma_driver_mips.h index 0b3b32aeeb8a..8eea7f9e33b4 100644 --- a/include/linux/bcma/bcma_driver_mips.h +++ b/include/linux/bcma/bcma_driver_mips.h @@ -39,21 +39,6 @@ struct bcma_drv_mips { u8 early_setup_done:1; }; -#ifdef CONFIG_BCMA_DRIVER_MIPS -extern void bcma_core_mips_init(struct bcma_drv_mips *mcore); -extern void bcma_core_mips_early_init(struct bcma_drv_mips *mcore); - -extern unsigned int bcma_core_mips_irq(struct bcma_device *dev); -#else -static inline void bcma_core_mips_init(struct bcma_drv_mips *mcore) { } -static inline void bcma_core_mips_early_init(struct bcma_drv_mips *mcore) { } - -static inline unsigned int bcma_core_mips_irq(struct bcma_device *dev) -{ - return 0; -} -#endif - extern u32 bcma_cpu_clock(struct bcma_drv_mips *mcore); #endif /* LINUX_BCMA_DRIVER_MIPS_H_ */ diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index 6b8bca67851f..8e90004fdfd7 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -238,8 +238,6 @@ struct bcma_drv_pci { #define pcicore_write16(pc, offset, val) bcma_write16((pc)->core, offset, val) #define pcicore_write32(pc, offset, val) bcma_write32((pc)->core, offset, val) -extern void bcma_core_pci_early_init(struct bcma_drv_pci *pc); -extern void bcma_core_pci_init(struct bcma_drv_pci *pc); extern int bcma_core_pci_irq_ctl(struct bcma_bus *bus, struct bcma_device *core, bool enable); extern void bcma_core_pci_power_save(struct bcma_bus *bus, bool up); diff --git a/include/linux/bcma/bcma_driver_pcie2.h b/include/linux/bcma/bcma_driver_pcie2.h index d8c43294c527..31e6d17ab798 100644 --- a/include/linux/bcma/bcma_driver_pcie2.h +++ b/include/linux/bcma/bcma_driver_pcie2.h @@ -155,6 +155,4 @@ struct bcma_drv_pcie2 { #define pcie2_set32(pcie2, offset, set) bcma_set32((pcie2)->core, offset, set) #define pcie2_mask32(pcie2, offset, mask) bcma_mask32((pcie2)->core, offset, mask) -void bcma_core_pcie2_init(struct bcma_drv_pcie2 *pcie2); - #endif /* LINUX_BCMA_DRIVER_PCIE2_H_ */ -- cgit v1.2.3 From 68c062eaa87b7b85b65f20f25c54524437715a95 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 5 Mar 2015 11:42:43 +0100 Subject: dmaengine: Remove net_dma leftovers Commit 7bce d397 510a ("net_dma: simple removal") removed the net_dma support entirely but left some functions and prototypes in the dmaengine header. Remove them. Signed-off-by: Maxime Ripard Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 23 ----------------------- 1 file changed, 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index db0104b0da4d..f5cc5d4f1ad5 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -1108,27 +1108,4 @@ static inline struct dma_chan return __dma_request_channel(mask, fn, fn_param); } - -/* --- Helper iov-locking functions --- */ - -struct dma_page_list { - char __user *base_address; - int nr_pages; - struct page **pages; -}; - -struct dma_pinned_list { - int nr_iovecs; - struct dma_page_list page_list[0]; -}; - -struct dma_pinned_list *dma_pin_iovec_pages(struct iovec *iov, size_t len); -void dma_unpin_iovec_pages(struct dma_pinned_list* pinned_list); - -dma_cookie_t dma_memcpy_to_iovec(struct dma_chan *chan, struct iovec *iov, - struct dma_pinned_list *pinned_list, unsigned char *kdata, size_t len); -dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov, - struct dma_pinned_list *pinned_list, struct page *page, - unsigned int offset, size_t len); - #endif /* DMAENGINE_H */ -- cgit v1.2.3 From bfde98bd762346639f0a5a557e02c4828dd6273b Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 5 Mar 2015 11:48:50 +0100 Subject: dmaengine: Remove net_dma_find_channel Since commit 7bced397510a ("net_dma: simple removal") removed the net_dma support entirely, net_dma_find_channel has no users left. Remove the function entirely. Signed-off-by: Maxime Ripard Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index f5cc5d4f1ad5..2bff9abc162a 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -1090,7 +1090,6 @@ void dma_async_device_unregister(struct dma_device *device); void dma_run_dependencies(struct dma_async_tx_descriptor *tx); struct dma_chan *dma_get_slave_channel(struct dma_chan *chan); struct dma_chan *dma_get_any_slave_channel(struct dma_device *device); -struct dma_chan *net_dma_find_channel(void); #define dma_request_channel(mask, x, y) __dma_request_channel(&(mask), x, y) #define dma_request_slave_channel_compat(mask, x, y, dev, name) \ __dma_request_slave_channel_compat(&(mask), x, y, dev, name) -- cgit v1.2.3 From f33c9d655893d8632460696bbbdee737cb315711 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 24 Feb 2015 02:06:43 +0000 Subject: mmc: tmio: mmc: tmio: tmio_mmc_data has .chan_priv_?x dma_request_slave_channel_compat() in tmio_mmc_dma needs .chan_priv_tx/.chan_priv_rx. But these are copied from sh_mobile_sdhi only, and sh_mobile_sdhi_info is now almost same as tmio_mmc_data except .chan_priv_?x. sh_mobile_sdhi_info can be replaced to tmio_mmc_data, but it is used from ${LINUX}/arch/arm/mach-shmobile, ${LINUX}/arch/sh. So, this patch adds .chan_priv_?x into tmio_mmc_data as 1st step, and sh_mobile_sdhi driver has dummy operation for now. It will be replaced/removed together with platform data replace. Signed-off-by: Kuninori Morimoto Acked-by: Arnd Bergmann Acked-by: Ulf Hansson Acked-by: Lee Jones Signed-off-by: Vinod Koul --- include/linux/mfd/tmio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 605812820e48..24b86d538e88 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -111,6 +111,8 @@ struct dma_chan; * data for the MMC controller */ struct tmio_mmc_data { + void *chan_priv_tx; + void *chan_priv_rx; unsigned int hclk; unsigned long capabilities; unsigned long capabilities2; -- cgit v1.2.3 From 84f11d5b1f2abc0e22895b7e12e037f0ec03caeb Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 24 Feb 2015 02:07:07 +0000 Subject: mmc: sh_mobile_sdhi: remove sh_mobile_sdhi_info Current sh_mobile_sdhi's platform data is set via sh_mobile_sdhi_info and it is just copied to tmio_mmc_data. Now, tmio mmc platform data is specified via tmio_mmc_data. This patch replace sh_mobile_sdhi_info to tmio_mmc_data struct sh_mobile_sdhi_info { -> struct tmio_mmc_data { int dma_slave_tx; -> void *chan_priv_tx; int dma_slave_rx; -> void *chan_priv_rx; unsigned long tmio_flags; -> unsigned long flags; unsigned long tmio_caps; -> unsigned long capabilities; unsigned long tmio_caps2; -> unsigned long capabilities2; u32 tmio_ocr_mask; -> u32 ocr_mask; unsigned int cd_gpio; -> unsigned int cd_gpio; }; unsigned int hclk; void (*set_pwr)(...); void (*set_clk_div)(...); }; Signed-off-by: Kuninori Morimoto Acked-by: Arnd Bergmann Acked-by: Ulf Hansson Signed-off-by: Vinod Koul --- include/linux/mmc/sh_mobile_sdhi.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/sh_mobile_sdhi.h b/include/linux/mmc/sh_mobile_sdhi.h index da77e5e2041d..95d6f0314a7d 100644 --- a/include/linux/mmc/sh_mobile_sdhi.h +++ b/include/linux/mmc/sh_mobile_sdhi.h @@ -7,14 +7,4 @@ #define SH_MOBILE_SDHI_IRQ_SDCARD "sdcard" #define SH_MOBILE_SDHI_IRQ_SDIO "sdio" -struct sh_mobile_sdhi_info { - int dma_slave_tx; - int dma_slave_rx; - unsigned long tmio_flags; - unsigned long tmio_caps; - unsigned long tmio_caps2; - u32 tmio_ocr_mask; /* available MMC voltages */ - unsigned int cd_gpio; -}; - #endif /* LINUX_MMC_SH_MOBILE_SDHI_H */ -- cgit v1.2.3 From 046db763aaaeb987ea01ea8c7e6d618e0ad1e6b8 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 5 Mar 2015 15:39:20 +0000 Subject: regulator: core: Add devres versions of notifier registration Add devm_regulator_register_notifier, this adds the resource against the device for the consumer supply we are registering the notifier for. There seem to be few use-cases where this wouldn't be the users intention and this ensures the notifiers will always be removed at the correct time. Signed-off-by: Charles Keepax Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index d17e1ff7ad01..bd631ee5f1da 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -252,8 +252,12 @@ int regulator_list_hardware_vsel(struct regulator *regulator, /* regulator notifier block */ int regulator_register_notifier(struct regulator *regulator, struct notifier_block *nb); +int devm_regulator_register_notifier(struct regulator *regulator, + struct notifier_block *nb); int regulator_unregister_notifier(struct regulator *regulator, struct notifier_block *nb); +void devm_regulator_unregister_notifier(struct regulator *regulator, + struct notifier_block *nb); /* driver data - core doesn't touch */ void *regulator_get_drvdata(struct regulator *regulator); @@ -515,12 +519,24 @@ static inline int regulator_register_notifier(struct regulator *regulator, return 0; } +static inline int devm_regulator_register_notifier(struct regulator *regulator, + struct notifier_block *nb) +{ + return 0; +} + static inline int regulator_unregister_notifier(struct regulator *regulator, struct notifier_block *nb) { return 0; } +static inline int devm_regulator_unregister_notifier(struct regulator *regulator, + struct notifier_block *nb) +{ + return 0; +} + static inline void *regulator_get_drvdata(struct regulator *regulator) { return NULL; -- cgit v1.2.3 From 842a9ae08a25671db3d4f689eed68b4d64be15b5 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Wed, 4 Mar 2015 12:54:21 +0200 Subject: bridge: Extend Proxy ARP design to allow optional rules for Wi-Fi This extends the design in commit 958501163ddd ("bridge: Add support for IEEE 802.11 Proxy ARP") with optional set of rules that are needed to meet the IEEE 802.11 and Hotspot 2.0 requirements for ProxyARP. The previously added BR_PROXYARP behavior is left as-is and a new BR_PROXYARP_WIFI alternative is added so that this behavior can be configured from user space when required. In addition, this enables proxyarp functionality for unicast ARP requests for both BR_PROXYARP and BR_PROXYARP_WIFI since it is possible to use unicast as well as broadcast for these frames. The key differences in functionality: BR_PROXYARP: - uses the flag on the bridge port on which the request frame was received to determine whether to reply - block bridge port flooding completely on ports that enable proxy ARP BR_PROXYARP_WIFI: - uses the flag on the bridge port to which the target device of the request belongs - block bridge port flooding selectively based on whether the proxyarp functionality replied Signed-off-by: Jouni Malinen Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index a57bca2ea97e..dad8b00beed2 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -44,6 +44,7 @@ struct br_ip_list { #define BR_PROMISC BIT(7) #define BR_PROXYARP BIT(8) #define BR_LEARNING_SYNC BIT(9) +#define BR_PROXYARP_WIFI BIT(10) extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *)); -- cgit v1.2.3 From d8bf368d0631d4bc2612d8bf2e4e8e74e620d0cc Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Thu, 5 Mar 2015 15:23:08 +0100 Subject: genirq: Remove the deprecated 'IRQF_DISABLED' request_irq() flag entirely The IRQF_DISABLED flag is a NOOP and has been scheduled for removal since Linux v2.6.36 by commit 6932bf37bed4 ("genirq: Remove IRQF_DISABLED from core code"). According to commit e58aa3d2d0cc ("genirq: Run irq handlers with interrupts disabled"), running IRQ handlers with interrupts enabled can cause stack overflows when the interrupt line of the issuing device is still active. This patch ends the grace period for IRQF_DISABLED (i.e., SA_INTERRUPT in older versions of Linux) and removes the definition and all remaining usages of this flag. There's still a few non-functional references left in the kernel source: - The bigger hunk in Documentation/scsi/ncr53c8xx.txt is removed entirely as IRQF_DISABLED is gone now; the usage in older kernel versions (including the old SA_INTERRUPT flag) should be discouraged. The trouble of using IRQF_SHARED is a general problem and not specific to any driver. - I left the reference in Documentation/PCI/MSI-HOWTO.txt untouched since it has already been removed in linux-next. - All remaining references are changelogs that I suggest to keep. Signed-off-by: Valentin Rothberg Cc: Afzal Mohammed Cc: Arnd Bergmann Cc: Brian Norris Cc: Christoph Hellwig Cc: Dan Carpenter Cc: David Woodhouse Cc: Ewan Milne Cc: Eyal Perry Cc: Felipe Balbi Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: Hannes Reinecke Cc: Hongliang Tao Cc: Huacai Chen Cc: Jiri Kosina Cc: Jonathan Corbet Cc: Keerthy Cc: Laurent Pinchart Cc: Linus Torvalds Cc: Nishanth Menon Cc: Paul Bolle Cc: Peter Ujfalusi Cc: Peter Zijlstra Cc: Quentin Lambert Cc: Rajendra Nayak Cc: Ralf Baechle Cc: Santosh Shilimkar Cc: Sricharan R Cc: Thomas Gleixner Cc: Tony Lindgren Cc: Zhou Wang Cc: iss_storagedev@hp.com Cc: linux-mips@linux-mips.org Cc: linux-mtd@lists.infradead.org Link: http://lkml.kernel.org/r/1425565425-12604-1-git-send-email-valentinrothberg@gmail.com Signed-off-by: Ingo Molnar --- include/linux/interrupt.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 3bb01b9a379c..2cee1761c77d 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -39,8 +39,6 @@ * These flags used only by the kernel as part of the * irq handling routines. * - * IRQF_DISABLED - keep irqs disabled when calling the action handler. - * DEPRECATED. This flag is a NOOP and scheduled to be removed * IRQF_SHARED - allow sharing the irq among several devices * IRQF_PROBE_SHARED - set by callers when they expect sharing mismatches to occur * IRQF_TIMER - Flag to mark this interrupt as timer interrupt @@ -58,7 +56,6 @@ * IRQF_EARLY_RESUME - Resume IRQ early during syscore instead of at device * resume time. */ -#define IRQF_DISABLED 0x00000020 #define IRQF_SHARED 0x00000080 #define IRQF_PROBE_SHARED 0x00000100 #define __IRQF_TIMER 0x00000200 -- cgit v1.2.3 From 4586f1bb911ce219a4bc1c2a9d6eee2e058b2b51 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Thu, 5 Mar 2015 21:21:14 -0800 Subject: netdevice: add IPv4 fib add/del ops Add two new ndo ops for IPv4 fib offload support, add and del. Add uses modifiy semantics if fib entry already offloaded. Drivers implementing the new ndo ops will return err<0 if programming device fails, for example if device's tables are full. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 625c8d71511b..45413784a3b1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -768,6 +768,8 @@ struct netdev_phys_item_id { typedef u16 (*select_queue_fallback_t)(struct net_device *dev, struct sk_buff *skb); +struct fib_info; + /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are @@ -1031,6 +1033,14 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * int (*ndo_switch_port_stp_update)(struct net_device *dev, u8 state); * Called to notify switch device port of bridge port STP * state change. + * int (*ndo_sw_parent_fib_ipv4_add)(struct net_device *dev, __be32 dst, + * int dst_len, struct fib_info *fi, + * u8 tos, u8 type, u32 tb_id); + * Called to add/modify IPv4 route to switch device. + * int (*ndo_sw_parent_fib_ipv4_del)(struct net_device *dev, __be32 dst, + * int dst_len, struct fib_info *fi, + * u8 tos, u8 type, u32 tb_id); + * Called to delete IPv4 route from switch device. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1192,6 +1202,18 @@ struct net_device_ops { struct netdev_phys_item_id *psid); int (*ndo_switch_port_stp_update)(struct net_device *dev, u8 state); + int (*ndo_switch_fib_ipv4_add)(struct net_device *dev, + __be32 dst, + int dst_len, + struct fib_info *fi, + u8 tos, u8 type, + u32 tb_id); + int (*ndo_switch_fib_ipv4_del)(struct net_device *dev, + __be32 dst, + int dst_len, + struct fib_info *fi, + u8 tos, u8 type, + u32 tb_id); #endif }; -- cgit v1.2.3 From 2e67690137f3a7bac660edd548f8846709c55381 Mon Sep 17 00:00:00 2001 From: Robert ABEL Date: Fri, 27 Feb 2015 16:56:53 +0100 Subject: ARM OMAP2+ GPMC: calculate GPMCFCLKDIVIDER based on WAITMONITORINGTIME The WAITMONITORINGTIME is expressed as a number of GPMC_CLK clock cycles, even though the access is defined as asynchronous, and no GPMC_CLK clock is provided to the external device. Still, GPMCFCLKDIVIDER is used as a divider for the GPMC clock, so it must be programmed to define the correct WAITMONITORINGTIME delay. Calculate GPMCFCLKDIVIDER independent of gpmc,sync-clk-ps in DT for pure asynchronous accesses, i.e. both read and write asynchronous. Signed-off-by: Robert ABEL Acked-by: Tony Lindgren Signed-off-by: Roger Quadros --- include/linux/omap-gpmc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h index c2080eebbb47..7dee00143afd 100644 --- a/include/linux/omap-gpmc.h +++ b/include/linux/omap-gpmc.h @@ -163,7 +163,8 @@ extern unsigned int gpmc_ticks_to_ns(unsigned int ticks); extern void gpmc_cs_write_reg(int cs, int idx, u32 val); extern int gpmc_calc_divider(unsigned int sync_clk); -extern int gpmc_cs_set_timings(int cs, const struct gpmc_timings *t); +extern int gpmc_cs_set_timings(int cs, const struct gpmc_timings *t, + const struct gpmc_settings *s); extern int gpmc_cs_program_settings(int cs, struct gpmc_settings *p); extern int gpmc_cs_request(int cs, unsigned long size, unsigned long *base); extern void gpmc_cs_free(int cs); -- cgit v1.2.3 From b716c4ffc6a2b0bfbcf9619880f335be11b65708 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 27 Feb 2015 17:34:15 +0200 Subject: spi: introduce master->handle_err() callback This callback would be useful to handle an error that occurs in the generic implementation of transfer_one_message(). The good candidate for this is to drain FIFO and / or to terminate DMA transfers when timeout happened. Signed-off-by: Andy Shevchenko Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index ed9489d893a4..4eaac3a5227b 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -294,6 +294,8 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * transfer_one_message are mutually exclusive; when both * are set, the generic subsystem does not call your * transfer_one callback. + * @handle_err: the subsystem calls the driver to handle and error that occurs + * in the generic implementation of transfer_one_message(). * @unprepare_message: undo any work done by prepare_message(). * @cs_gpios: Array of GPIOs to use as chip select lines; one per CS * number. Any individual value may be -ENOENT for CS lines that @@ -448,6 +450,8 @@ struct spi_master { void (*set_cs)(struct spi_device *spi, bool enable); int (*transfer_one)(struct spi_master *master, struct spi_device *spi, struct spi_transfer *transfer); + void (*handle_err)(struct spi_master *master, + struct spi_message *message); /* gpio chip select */ int *cs_gpios; -- cgit v1.2.3 From df3a950e4e7386027fc174566aa5c24781297be8 Mon Sep 17 00:00:00 2001 From: Zubair Lutfullah Kakakhel Date: Fri, 27 Feb 2015 17:04:04 +0000 Subject: regulator: act8865: Add act8600 support This patch adds act8600 support to the act8865 driver. VBUS and USB charger supported by this chip can be added later Tested on MIPS Creator CI20 Signed-off-by: Zubair Lutfullah Kakakhel Signed-off-by: Mark Brown --- include/linux/regulator/act8865.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/act8865.h b/include/linux/regulator/act8865.h index b6c4909b33af..15fa8f2d35c9 100644 --- a/include/linux/regulator/act8865.h +++ b/include/linux/regulator/act8865.h @@ -18,6 +18,19 @@ #include +enum { + ACT8600_ID_DCDC1, + ACT8600_ID_DCDC2, + ACT8600_ID_DCDC3, + ACT8600_ID_SUDCDC4, + ACT8600_ID_LDO5, + ACT8600_ID_LDO6, + ACT8600_ID_LDO7, + ACT8600_ID_LDO8, + ACT8600_ID_LDO9, + ACT8600_ID_LDO10, +}; + enum { ACT8865_ID_DCDC1, ACT8865_ID_DCDC2, @@ -46,6 +59,7 @@ enum { }; enum { + ACT8600, ACT8865, ACT8846, }; -- cgit v1.2.3 From 637473cf006fe4cd85aed0fb69b6c917d868ada2 Mon Sep 17 00:00:00 2001 From: Sharon Dvir Date: Thu, 22 Jan 2015 12:15:25 +0200 Subject: mod_devicetable: fix comment for match_flags Signed-off-by: Sharon Dvir Signed-off-by: Jiri Kosina --- include/linux/mod_devicetable.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 745def862580..470a240f66a1 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -53,9 +53,9 @@ struct ieee1394_device_id { /** * struct usb_device_id - identifies USB devices for probing and hotplugging - * @match_flags: Bit mask controlling of the other fields are used to match - * against new devices. Any field except for driver_info may be used, - * although some only make sense in conjunction with other fields. + * @match_flags: Bit mask controlling which of the other fields are used to + * match against new devices. Any field except for driver_info may be + * used, although some only make sense in conjunction with other fields. * This is usually set by a USB_DEVICE_*() macro, which sets all * other fields in this structure except for driver_info. * @idVendor: USB vendor ID for a device; numbers are assigned -- cgit v1.2.3 From c5b66e47251d797e38f3ee8ec8d613780506c245 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 23 Jan 2015 13:36:55 +0800 Subject: smpboot.h: Remove unused function prototype Function smpboot_thread_schedule() is neither used nor defined, so kill it. Signed-off-by: Jiang Liu Signed-off-by: Jiri Kosina --- include/linux/smpboot.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h index 13e929679550..d600afb21926 100644 --- a/include/linux/smpboot.h +++ b/include/linux/smpboot.h @@ -47,6 +47,5 @@ struct smp_hotplug_thread { int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread); void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread); -int smpboot_thread_schedule(void); #endif -- cgit v1.2.3 From f38bacb3ebdd30ae4695735a82497659ba05d9fa Mon Sep 17 00:00:00 2001 From: Himanshu Maithani Date: Fri, 30 Jan 2015 01:31:14 +0530 Subject: stacktrace.h: remove duplicate declaration task_struct There is duplicate declaration for struct task_struct. One can be removed safely. Signed-off-by: Himanshu Maithani Signed-off-by: Jiri Kosina --- include/linux/stacktrace.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 115b570e3bff..1fea0380e97f 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -5,8 +5,6 @@ struct task_struct; struct pt_regs; #ifdef CONFIG_STACKTRACE -struct task_struct; - struct stack_trace { unsigned int nr_entries, max_entries; unsigned long *entries; -- cgit v1.2.3 From c74a804f115bdedcac72ea52ca33f46cfae3b74f Mon Sep 17 00:00:00 2001 From: Dmitry Kalinkin Date: Thu, 26 Feb 2015 18:53:10 +0300 Subject: staging: vme: mmap() support for vme_user We also make sure that user won't be able to reconfigure the window while it is mmap'ed. Signed-off-by: Dmitry Kalinkin Cc: Martyn Welch Cc: Igor Alekseev Signed-off-by: Greg Kroah-Hartman --- include/linux/vme.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/vme.h b/include/linux/vme.h index 8cd6f19ca518..79242e9c06b8 100644 --- a/include/linux/vme.h +++ b/include/linux/vme.h @@ -137,6 +137,7 @@ ssize_t vme_master_read(struct vme_resource *, void *, size_t, loff_t); ssize_t vme_master_write(struct vme_resource *, void *, size_t, loff_t); unsigned int vme_master_rmw(struct vme_resource *, unsigned int, unsigned int, unsigned int, loff_t); +int vme_master_mmap(struct vme_resource *resource, struct vm_area_struct *vma); void vme_master_free(struct vme_resource *); struct vme_resource *vme_dma_request(struct vme_dev *, u32); -- cgit v1.2.3 From 2b49e0c56741fca538176f66ed3c8d16ce4fccd8 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 23 Feb 2015 16:24:42 +0200 Subject: dmaengine: append hsu DMA driver The HSU DMA is developed to support High Speed UART controllers found in particular on Intel MID platforms such as Intel Medfield. The existing implementation is tighten to the drivers/tty/serial/mfd.c driver and has a lot of disadvantages. Besides that we would like to get rid of the old HS UART driver in regarding to extending the 8250 which supports generic DMAEngine API. That's why the current driver has been developed. Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- include/linux/dma/hsu.h | 48 +++++++++++++++++++++++++++++++++++ include/linux/platform_data/dma-hsu.h | 25 ++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 include/linux/dma/hsu.h create mode 100644 include/linux/platform_data/dma-hsu.h (limited to 'include/linux') diff --git a/include/linux/dma/hsu.h b/include/linux/dma/hsu.h new file mode 100644 index 000000000000..234393a6997b --- /dev/null +++ b/include/linux/dma/hsu.h @@ -0,0 +1,48 @@ +/* + * Driver for the High Speed UART DMA + * + * Copyright (C) 2015 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _DMA_HSU_H +#define _DMA_HSU_H + +#include +#include + +#include + +struct hsu_dma; + +/** + * struct hsu_dma_chip - representation of HSU DMA hardware + * @dev: struct device of the DMA controller + * @irq: irq line + * @regs: memory mapped I/O space + * @length: I/O space length + * @offset: offset of the I/O space where registers are located + * @hsu: struct hsu_dma that is filed by ->probe() + * @pdata: platform data for the DMA controller if provided + */ +struct hsu_dma_chip { + struct device *dev; + int irq; + void __iomem *regs; + unsigned int length; + unsigned int offset; + struct hsu_dma *hsu; + struct hsu_dma_platform_data *pdata; +}; + +/* Export to the internal users */ +irqreturn_t hsu_dma_irq(struct hsu_dma_chip *chip, unsigned short nr); + +/* Export to the platform drivers */ +int hsu_dma_probe(struct hsu_dma_chip *chip); +int hsu_dma_remove(struct hsu_dma_chip *chip); + +#endif /* _DMA_HSU_H */ diff --git a/include/linux/platform_data/dma-hsu.h b/include/linux/platform_data/dma-hsu.h new file mode 100644 index 000000000000..8a1f6a4920b2 --- /dev/null +++ b/include/linux/platform_data/dma-hsu.h @@ -0,0 +1,25 @@ +/* + * Driver for the High Speed UART DMA + * + * Copyright (C) 2015 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _PLATFORM_DATA_DMA_HSU_H +#define _PLATFORM_DATA_DMA_HSU_H + +#include + +struct hsu_dma_slave { + struct device *dma_dev; + int chan_id; +}; + +struct hsu_dma_platform_data { + unsigned short nr_channels; +}; + +#endif /* _PLATFORM_DATA_DMA_HSU_H */ -- cgit v1.2.3 From 1bd187de536494a27925902b9653e9ef0ace4774 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 23 Feb 2015 16:24:44 +0200 Subject: x86, intel-mid: remove Intel MID specific serial support Since we have a native 8250 driver carrying the Intel MID serial devices the specific support is not needed anymore. This patch removes it for Intel MID. Note that the console device name is changed from ttyMFDx to ttySx. Signed-off-by: Andy Shevchenko Acked-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_mfd.h | 47 ---------------------------------------------- 1 file changed, 47 deletions(-) delete mode 100644 include/linux/serial_mfd.h (limited to 'include/linux') diff --git a/include/linux/serial_mfd.h b/include/linux/serial_mfd.h deleted file mode 100644 index 2b071e0b034d..000000000000 --- a/include/linux/serial_mfd.h +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef _SERIAL_MFD_H_ -#define _SERIAL_MFD_H_ - -/* HW register offset definition */ -#define UART_FOR 0x08 -#define UART_PS 0x0C -#define UART_MUL 0x0D -#define UART_DIV 0x0E - -#define HSU_GBL_IEN 0x0 -#define HSU_GBL_IST 0x4 - -#define HSU_GBL_INT_BIT_PORT0 0x0 -#define HSU_GBL_INT_BIT_PORT1 0x1 -#define HSU_GBL_INT_BIT_PORT2 0x2 -#define HSU_GBL_INT_BIT_IRI 0x3 -#define HSU_GBL_INT_BIT_HDLC 0x4 -#define HSU_GBL_INT_BIT_DMA 0x5 - -#define HSU_GBL_ISR 0x8 -#define HSU_GBL_DMASR 0x400 -#define HSU_GBL_DMAISR 0x404 - -#define HSU_PORT_REG_OFFSET 0x80 -#define HSU_PORT0_REG_OFFSET 0x80 -#define HSU_PORT1_REG_OFFSET 0x100 -#define HSU_PORT2_REG_OFFSET 0x180 -#define HSU_PORT_REG_LENGTH 0x80 - -#define HSU_DMA_CHANS_REG_OFFSET 0x500 -#define HSU_DMA_CHANS_REG_LENGTH 0x40 - -#define HSU_CH_SR 0x0 /* channel status reg */ -#define HSU_CH_CR 0x4 /* control reg */ -#define HSU_CH_DCR 0x8 /* descriptor control reg */ -#define HSU_CH_BSR 0x10 /* max fifo buffer size reg */ -#define HSU_CH_MOTSR 0x14 /* minimum ocp transfer size */ -#define HSU_CH_D0SAR 0x20 /* desc 0 start addr */ -#define HSU_CH_D0TSR 0x24 /* desc 0 transfer size */ -#define HSU_CH_D1SAR 0x28 -#define HSU_CH_D1TSR 0x2C -#define HSU_CH_D2SAR 0x30 -#define HSU_CH_D2TSR 0x34 -#define HSU_CH_D3SAR 0x38 -#define HSU_CH_D3TSR 0x3C - -#endif -- cgit v1.2.3 From afe9cbb1a6adf6da5fa6d4747d102b95b4bb52c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 24 Feb 2015 11:17:10 +0100 Subject: serial: imx: drop support for IRDA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Support for IRDA was added in 2009 in commit v2.6.31-rc1~399^2~2. There are no in-tree users. Signed-off-by: Uwe Kleine-König Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_data/serial-imx.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/serial-imx.h b/include/linux/platform_data/serial-imx.h index 3cc2e3c40914..a938eba2f18e 100644 --- a/include/linux/platform_data/serial-imx.h +++ b/include/linux/platform_data/serial-imx.h @@ -20,14 +20,9 @@ #define ASMARM_ARCH_UART_H #define IMXUART_HAVE_RTSCTS (1<<0) -#define IMXUART_IRDA (1<<1) struct imxuart_platform_data { unsigned int flags; - void (*irda_enable)(int enable); - unsigned int irda_inv_rx:1; - unsigned int irda_inv_tx:1; - unsigned short transceiver_delay; }; #endif -- cgit v1.2.3 From d237baa1cbb3a2335357484c1d63a810a01947e2 Mon Sep 17 00:00:00 2001 From: Shani Michaeli Date: Thu, 5 Mar 2015 20:16:12 +0200 Subject: net/mlx4_core: Add basic elements for QCN Add device capability, firmware command opcode and etc prior elements needed for QCN suppprt. Disable SRIOV VF view/access for QCN is disabled. While here, remove a redundant offset definition into the QUERY_DEV_CAP mailbox. Signed-off-by: Shani Michaeli Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/cmd.h | 13 +++++++++++++ include/linux/mlx4/device.h | 3 ++- 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 7b6d4e9ff603..7299e9548906 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -163,6 +163,9 @@ enum { MLX4_QP_FLOW_STEERING_ATTACH = 0x65, MLX4_QP_FLOW_STEERING_DETACH = 0x66, MLX4_FLOW_STEERING_IB_UC_QP_RANGE = 0x64, + + /* Update and read QCN parameters */ + MLX4_CMD_CONGESTION_CTRL_OPCODE = 0x68, }; enum { @@ -233,6 +236,16 @@ struct mlx4_config_dev_params { u8 rx_csum_flags_port_2; }; +enum mlx4_en_congestion_control_algorithm { + MLX4_CTRL_ALGO_802_1_QAU_REACTION_POINT = 0, +}; + +enum mlx4_en_congestion_control_opmod { + MLX4_CONGESTION_CONTROL_GET_PARAMS, + MLX4_CONGESTION_CONTROL_GET_STATISTICS, + MLX4_CONGESTION_CONTROL_SET_PARAMS = 4, +}; + struct mlx4_dev; struct mlx4_cmd_mailbox { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index e4ebff7e9d02..1cc54822b931 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -203,7 +203,8 @@ enum { MLX4_DEV_CAP_FLAG2_80_VFS = 1LL << 18, MLX4_DEV_CAP_FLAG2_FS_A0 = 1LL << 19, MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 20, - MLX4_DEV_CAP_FLAG2_PORT_REMAP = 1LL << 21 + MLX4_DEV_CAP_FLAG2_PORT_REMAP = 1LL << 21, + MLX4_DEV_CAP_FLAG2_QCN = 1LL << 22, }; enum { -- cgit v1.2.3 From 73abaf87f01be6fa6da3c0aa9c138a1b6b281068 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 1 Mar 2015 11:05:46 -0500 Subject: serial: earlycon: Refactor parse_options into serial core Prepare to support console-defined matching; refactor the command line parameter string processing from parse_options() into a new core function, uart_parse_earlycon(), which decodes command line parameters of the form: earlycon=,io|mmio|mmio32,, console=,io|mmio|mmio32,, earlycon=,0x, console=,0x, Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index baf3e1d08416..cc5c506f07dd 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -354,6 +354,8 @@ early_param("earlycon", name ## _setup_earlycon); struct uart_port *uart_get_console(struct uart_port *ports, int nr, struct console *c); +int uart_parse_earlycon(char *p, unsigned char *iotype, unsigned long *addr, + char **options); void uart_parse_options(char *options, int *baud, int *parity, int *bits, int *flow); int uart_set_options(struct uart_port *port, struct console *co, int baud, -- cgit v1.2.3 From ea022bbb0090975a21c581d8405fbe90043a6eda Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Sun, 8 Mar 2015 14:56:38 +0100 Subject: spi: Remove support for legacy PM All SPI drivers have been converted from legacy suspend/resume callbacks to dev_pm_ops. So we can finally remove support for legacy PM from the SPI core. Since there aren't any special bus specific things to do during suspend/resume and since the PM core will automatically fallback directly to using the device's PM ops if no bus PM ops are specified there is no need to have any special SPI bus PM ops. Signed-off-by: Lars-Peter Clausen Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index ed9489d893a4..9f6b481e8672 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -162,8 +162,6 @@ struct spi_transfer; * @remove: Unbinds this driver from the spi device * @shutdown: Standard shutdown callback used during system state * transitions such as powerdown/halt and kexec - * @suspend: Standard suspend callback used during system state transitions - * @resume: Standard resume callback used during system state transitions * @driver: SPI device drivers should initialize the name and owner * field of this structure. * @@ -184,8 +182,6 @@ struct spi_driver { int (*probe)(struct spi_device *spi); int (*remove)(struct spi_device *spi); void (*shutdown)(struct spi_device *spi); - int (*suspend)(struct spi_device *spi, pm_message_t mesg); - int (*resume)(struct spi_device *spi); struct device_driver driver; }; -- cgit v1.2.3 From 04abab698285297115e5096b3100df1064045529 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Delgado Date: Wed, 11 Feb 2015 13:53:15 +0100 Subject: include/dma-mapping: Clarify output of dma_map_sg Although dma_map_sg returns 0 on error and it cannot return a value < 0, the function returns a signed integer. Most of the time, this function is used with a scatterlist structure. This structure uses an unsigned integer for the number of memory. A dma developer that has not read in detail DMA-API.txt, can wrongly return a value < 0 on error. Signed-off-by: Ricardo Ribalda Delgado Signed-off-by: Marek Szyprowski --- include/linux/dma-mapping.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index c3007cb4bfa6..ac07ff090919 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -34,6 +34,10 @@ struct dma_map_ops { void (*unmap_page)(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs); + /* + * map_sg returns 0 on error and a value > 0 on success. + * It should never return a value < 0. + */ int (*map_sg)(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, struct dma_attrs *attrs); -- cgit v1.2.3 From 8bd63cf1a426e69bf4f611b08978f721e46c194f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 5 Mar 2015 00:52:33 +0100 Subject: bridge: move mac header copying into br_netfilter The mac header only has to be copied back into the skb for fragments generated by ip_fragment(), which only happens for bridge forwarded packets with nf-call-iptables=1 && active nf_defrag. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 31 ------------------------------- 1 file changed, 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index c755e4971fa3..332ef8ab37e9 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -44,36 +44,6 @@ static inline void nf_bridge_update_protocol(struct sk_buff *skb) skb->protocol = htons(ETH_P_PPP_SES); } -/* Fill in the header for fragmented IP packets handled by - * the IPv4 connection tracking code. - * - * Only used in br_forward.c - */ -static inline int nf_bridge_copy_header(struct sk_buff *skb) -{ - int err; - unsigned int header_size; - - nf_bridge_update_protocol(skb); - header_size = ETH_HLEN + nf_bridge_encap_header_len(skb); - err = skb_cow_head(skb, header_size); - if (err) - return err; - - skb_copy_to_linear_data_offset(skb, -header_size, - skb->nf_bridge->data, header_size); - __skb_push(skb, nf_bridge_encap_header_len(skb)); - return 0; -} - -static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb) -{ - if (skb->nf_bridge && - skb->nf_bridge->mask & (BRNF_BRIDGED | BRNF_BRIDGED_DNAT)) - return nf_bridge_copy_header(skb); - return 0; -} - static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) { if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE)) @@ -119,7 +89,6 @@ static inline void br_drop_fake_rtable(struct sk_buff *skb) } #else -#define nf_bridge_maybe_copy_header(skb) (0) #define nf_bridge_pad(skb) (0) #define br_drop_fake_rtable(skb) do { } while (0) #endif /* CONFIG_BRIDGE_NETFILTER */ -- cgit v1.2.3 From 4a9d2f200862683d6680d5565f30c126625afe65 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 5 Mar 2015 00:52:34 +0100 Subject: netfilter: bridge: move nf_bridge_update_protocol to where its used no need to keep it in a header file. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index 332ef8ab37e9..dd580a9a1add 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -36,14 +36,6 @@ static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) } } -static inline void nf_bridge_update_protocol(struct sk_buff *skb) -{ - if (skb->nf_bridge->mask & BRNF_8021Q) - skb->protocol = htons(ETH_P_8021Q); - else if (skb->nf_bridge->mask & BRNF_PPPoE) - skb->protocol = htons(ETH_P_PPP_SES); -} - static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) { if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE)) -- cgit v1.2.3 From 3494fc30846dceb808de4cc02930ef347fabd21a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Mar 2015 09:22:28 -0400 Subject: workqueue: dump workqueues on sysrq-t Workqueues are used extensively throughout the kernel but sometimes it's difficult to debug stalls involving work items because visibility into its inner workings is fairly limited. Although sysrq-t task dump annotates each active worker task with the information on the work item being executed, it is challenging to find out which work items are pending or delayed on which queues and how pools are being managed. This patch implements show_workqueue_state() which dumps all busy workqueues and pools and is called from the sysrq-t handler. At the end of sysrq-t dump, something like the following is printed. Showing busy workqueues and worker pools: ... workqueue filler_wq: flags=0x0 pwq 2: cpus=1 node=0 flags=0x0 nice=0 active=2/256 in-flight: 491:filler_workfn, 507:filler_workfn pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=2/256 in-flight: 501:filler_workfn pending: filler_workfn ... workqueue test_wq: flags=0x8 pwq 2: cpus=1 node=0 flags=0x0 nice=0 active=1/1 in-flight: 510(RESCUER):test_workfn BAR(69) BAR(500) delayed: test_workfn1 BAR(492), test_workfn2 ... pool 0: cpus=0 node=0 flags=0x0 nice=0 workers=2 manager: 137 pool 2: cpus=1 node=0 flags=0x0 nice=0 workers=3 manager: 469 pool 3: cpus=1 node=0 flags=0x0 nice=-20 workers=2 idle: 16 pool 8: cpus=0-3 flags=0x4 nice=0 workers=2 manager: 62 The above shows that test_wq is executing test_workfn() on pid 510 which is the rescuer and also that there are two tasks 69 and 500 waiting for the work item to finish in flush_work(). As test_wq has max_active of 1, there are two work items for test_workfn1() and test_workfn2() which are delayed till the current work item is finished. In addition, pid 492 is flushing test_workfn1(). The work item for test_workfn() is being executed on pwq of pool 2 which is the normal priority per-cpu pool for CPU 1. The pool has three workers, two of which are executing filler_workfn() for filler_wq and the last one is assuming the manager role trying to create more workers. This extra workqueue state dump will hopefully help chasing down hangs involving workqueues. v3: cpulist_pr_cont() replaced with "%*pbl" printf formatting. v2: As suggested by Andrew, minor formatting change in pr_cont_work(), printk()'s replaced with pr_info()'s, and cpumask printing now uses cpulist_pr_cont(). Signed-off-by: Tejun Heo Cc: Lai Jiangshan Cc: Linus Torvalds Cc: Andrew Morton CC: Ingo Molnar --- include/linux/workqueue.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index f597846ff605..deee212af8e0 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -454,6 +454,7 @@ extern bool workqueue_congested(int cpu, struct workqueue_struct *wq); extern unsigned int work_busy(struct work_struct *work); extern __printf(1, 2) void set_worker_desc(const char *fmt, ...); extern void print_worker_info(const char *log_lvl, struct task_struct *task); +extern void show_workqueue_state(void); /** * queue_work - queue work on a workqueue -- cgit v1.2.3 From c467ea763fd5d8795b7d1b5a78eb94b6ad8f66ad Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 4 Mar 2015 18:06:33 +0100 Subject: context_tracking: Rename context symbols to prepare for transition state Current context tracking symbols are designed to express living state. As such they are prefixed with "IN_": IN_USER, IN_KERNEL. Now we are going to use these symbols to also express state transitions such as context_tracking_enter(IN_USER) or context_tracking_exit(IN_USER). But while the "IN_" prefix works well to express entering a context, it's confusing to depict a context exit: context_tracking_exit(IN_USER) could mean two things: 1) We are exiting the current context to enter user context. 2) We are exiting the user context We want 2) but the reviewer may be confused and understand 1) So lets disambiguate these symbols and rename them to CONTEXT_USER and CONTEXT_KERNEL. Acked-by: Rik van Riel Cc: Paul E. McKenney Cc: Andy Lutomirski Cc: Will deacon Cc: Marcelo Tosatti Cc: Christian Borntraeger Cc: Luiz Capitulino Cc: Paolo Bonzini Signed-off-by: Frederic Weisbecker --- include/linux/context_tracking.h | 2 +- include/linux/context_tracking_state.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 37b81bd51ec0..427b056dfd3d 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -43,7 +43,7 @@ static inline enum ctx_state exception_enter(void) static inline void exception_exit(enum ctx_state prev_ctx) { if (context_tracking_is_enabled()) { - if (prev_ctx == IN_USER) + if (prev_ctx == CONTEXT_USER) context_tracking_user_enter(); } } diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 97a81225d037..8076f875c324 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -13,8 +13,8 @@ struct context_tracking { */ bool active; enum ctx_state { - IN_KERNEL = 0, - IN_USER, + CONTEXT_KERNEL = 0, + CONTEXT_USER, } state; }; @@ -34,7 +34,7 @@ static inline bool context_tracking_cpu_is_enabled(void) static inline bool context_tracking_in_user(void) { - return __this_cpu_read(context_tracking.state) == IN_USER; + return __this_cpu_read(context_tracking.state) == CONTEXT_USER; } #else static inline bool context_tracking_in_user(void) { return false; } -- cgit v1.2.3 From 3aab4f50bff89bdea5066a05d4f3c5fa25bc37c7 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Tue, 10 Feb 2015 15:27:50 -0500 Subject: context_tracking: Generalize context tracking APIs to support user and guest Generalize the context tracking APIs to support various nature of contexts. This is performed by splitting out the mechanism from context_tracking_user_enter and context_tracking_user_exit into context_tracking_enter and context_tracking_exit. The nature of the context we track is now detailed in a ctx_state parameter pushed to these APIs, allowing the same functions to not just track kernel <> user space switching, but also kernel <> guest transitions. But leave the old functions in order to avoid breaking ARM, which calls these functions from assembler code, and cannot easily use C enum parameters. Reviewed-by: Paul E. McKenney Signed-off-by: Rik van Riel Cc: Paul E. McKenney Cc: Andy Lutomirski Cc: Will deacon Cc: Marcelo Tosatti Cc: Christian Borntraeger Cc: Luiz Capitulino Cc: Paolo Bonzini Signed-off-by: Frederic Weisbecker --- include/linux/context_tracking.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 427b056dfd3d..7f1810a3b5a4 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -10,6 +10,8 @@ #ifdef CONFIG_CONTEXT_TRACKING extern void context_tracking_cpu_set(int cpu); +extern void context_tracking_enter(enum ctx_state state); +extern void context_tracking_exit(enum ctx_state state); extern void context_tracking_user_enter(void); extern void context_tracking_user_exit(void); extern void __context_tracking_task_switch(struct task_struct *prev, @@ -35,7 +37,8 @@ static inline enum ctx_state exception_enter(void) return 0; prev_ctx = this_cpu_read(context_tracking.state); - context_tracking_user_exit(); + if (prev_ctx != CONTEXT_KERNEL) + context_tracking_exit(prev_ctx); return prev_ctx; } @@ -43,8 +46,8 @@ static inline enum ctx_state exception_enter(void) static inline void exception_exit(enum ctx_state prev_ctx) { if (context_tracking_is_enabled()) { - if (prev_ctx == CONTEXT_USER) - context_tracking_user_enter(); + if (prev_ctx != CONTEXT_KERNEL) + context_tracking_enter(prev_ctx); } } -- cgit v1.2.3 From a3a9c7dff25791f3a1041aa96e770af662d99a35 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Tue, 10 Feb 2015 15:27:51 -0500 Subject: context_tracking: Add stub context_tracking_is_enabled With code elsewhere doing something conditional on whether or not context tracking is enabled, we want a stub function that tells us context tracking is not enabled, when CONFIG_CONTEXT_TRACKING is not set. Reviewed-by: Paul E. McKenney Signed-off-by: Rik van Riel Cc: Paul E. McKenney Cc: Andy Lutomirski Cc: Will deacon Cc: Marcelo Tosatti Cc: Christian Borntraeger Cc: Luiz Capitulino Cc: Paolo Bonzini Signed-off-by: Frederic Weisbecker --- include/linux/context_tracking_state.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 8076f875c324..ad4458588b47 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -39,6 +39,8 @@ static inline bool context_tracking_in_user(void) #else static inline bool context_tracking_in_user(void) { return false; } static inline bool context_tracking_active(void) { return false; } +static inline bool context_tracking_is_enabled(void) { return false; } +static inline bool context_tracking_cpu_is_enabled(void) { return false; } #endif /* CONFIG_CONTEXT_TRACKING */ #endif -- cgit v1.2.3 From 126a6a542446f1a49b9f3c69237c87df3eb4e6e1 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Tue, 10 Feb 2015 15:27:54 -0500 Subject: kvm,rcu,nohz: use RCU extended quiescent state when running KVM guest The host kernel is not doing anything while the CPU is executing a KVM guest VCPU, so it can be marked as being in an extended quiescent state, identical to that used when running user space code. The only exception to that rule is when the host handles an interrupt, which is already handled by the irq code, which calls rcu_irq_enter and rcu_irq_exit. The guest_enter and guest_exit functions already switch vtime accounting independent of context tracking. Leave those calls where they are, instead of moving them into the context tracking code. Reviewed-by: Paul E. McKenney Signed-off-by: Rik van Riel Cc: Paul E. McKenney Cc: Andy Lutomirski Cc: Will deacon Cc: Marcelo Tosatti Cc: Christian Borntraeger Cc: Luiz Capitulino Cc: Paolo Bonzini Signed-off-by: Frederic Weisbecker --- include/linux/context_tracking.h | 6 ++++++ include/linux/context_tracking_state.h | 1 + include/linux/kvm_host.h | 3 ++- 3 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 7f1810a3b5a4..2821838256b4 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -81,10 +81,16 @@ static inline void guest_enter(void) vtime_guest_enter(current); else current->flags |= PF_VCPU; + + if (context_tracking_is_enabled()) + context_tracking_enter(CONTEXT_GUEST); } static inline void guest_exit(void) { + if (context_tracking_is_enabled()) + context_tracking_exit(CONTEXT_GUEST); + if (vtime_accounting_enabled()) vtime_guest_exit(current); else diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index ad4458588b47..6b7b96a32b75 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -15,6 +15,7 @@ struct context_tracking { enum ctx_state { CONTEXT_KERNEL = 0, CONTEXT_USER, + CONTEXT_GUEST, } state; }; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d12b2104d19b..cc8c61c5459c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -766,7 +766,8 @@ static inline void kvm_guest_enter(void) * one time slice). Lets treat guest mode as quiescent state, just like * we do with user-mode execution. */ - rcu_virt_note_context_switch(smp_processor_id()); + if (!context_tracking_cpu_is_enabled()) + rcu_virt_note_context_switch(smp_processor_id()); } static inline void kvm_guest_exit(void) -- cgit v1.2.3 From e5de75bf88858f5b3ab11e2504b86ec059f03102 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 9 Mar 2015 12:30:12 +0100 Subject: netfilter: bridge: move DNAT helper to br_netfilter Only one caller, there is no need to keep this in a header. Move it to br_netfilter.c where this belongs to. Based on patch from Florian Westphal. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index dd580a9a1add..bb39113ea596 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -44,18 +44,6 @@ static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) } int br_handle_frame_finish(struct sk_buff *skb); -/* Only used in br_device.c */ -static inline int br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) -{ - struct nf_bridge_info *nf_bridge = skb->nf_bridge; - - skb_pull(skb, ETH_HLEN); - nf_bridge->mask ^= BRNF_BRIDGED_DNAT; - skb_copy_to_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), - skb->nf_bridge->data, ETH_HLEN-ETH_ALEN); - skb->dev = nf_bridge->physindev; - return br_handle_frame_finish(skb); -} /* This is called by the IP fragmenting code and it ensures there is * enough room for the encapsulating header (if there is one). */ -- cgit v1.2.3 From e39ce48f5362df9f87400b4909a6fb0f51b109ac Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Wed, 11 Feb 2015 19:35:27 -0800 Subject: regulator: Rename regulator_set_optimum_mode Rename the regulator_set_optimum_mode() function regulator_set_load() to better represent what's going on. Signed-off-by: Bjorn Andersson Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index d17e1ff7ad01..6d4e9d2289f0 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -238,7 +238,7 @@ int regulator_get_current_limit(struct regulator *regulator); int regulator_set_mode(struct regulator *regulator, unsigned int mode); unsigned int regulator_get_mode(struct regulator *regulator); -int regulator_set_optimum_mode(struct regulator *regulator, int load_uA); +int regulator_set_load(struct regulator *regulator, int load_uA); int regulator_allow_bypass(struct regulator *regulator, bool allow); @@ -479,8 +479,7 @@ static inline unsigned int regulator_get_mode(struct regulator *regulator) return REGULATOR_MODE_NORMAL; } -static inline int regulator_set_optimum_mode(struct regulator *regulator, - int load_uA) +static inline int regulator_set_load(struct regulator *regulator, int load_uA) { return REGULATOR_MODE_NORMAL; } @@ -555,4 +554,11 @@ static inline int regulator_is_supported_voltage_tol(struct regulator *regulator target_uV + tol_uV); } +/* TEMP: Wrapper to keep bisectability */ +static inline int regulator_set_optimum_mode(struct regulator *regulator, + int load_uA) +{ + return regulator_set_load(regulator, load_uA); +} + #endif -- cgit v1.2.3 From ae6e808f15742fcbc0097ac2fb3055d553266965 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Wed, 11 Feb 2015 19:35:31 -0800 Subject: regulator: Drop temporary regulator_set_optimum_mode wrapper Signed-off-by: Bjorn Andersson Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 6d4e9d2289f0..d8944f508235 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -554,11 +554,4 @@ static inline int regulator_is_supported_voltage_tol(struct regulator *regulator target_uV + tol_uV); } -/* TEMP: Wrapper to keep bisectability */ -static inline int regulator_set_optimum_mode(struct regulator *regulator, - int load_uA) -{ - return regulator_set_load(regulator, load_uA); -} - #endif -- cgit v1.2.3 From ca1bb4ee4c3a017bb66840d11d5efdf4e8f3f66d Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 23 Feb 2015 16:11:41 -0800 Subject: leds: Introduce devres helper for led_classdev_register (cooloney@gmail.com: add _unregister function into the document) Suggested-by: Stephen Boyd Signed-off-by: Bjorn Andersson Signed-off-by: Bryan Wu --- include/linux/leds.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index f70f84f35674..ed634279062e 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -105,7 +105,11 @@ struct led_classdev { extern int led_classdev_register(struct device *parent, struct led_classdev *led_cdev); +extern int devm_led_classdev_register(struct device *parent, + struct led_classdev *led_cdev); extern void led_classdev_unregister(struct led_classdev *led_cdev); +extern void devm_led_classdev_unregister(struct device *parent, + struct led_classdev *led_cdev); extern void led_classdev_suspend(struct led_classdev *led_cdev); extern void led_classdev_resume(struct led_classdev *led_cdev); -- cgit v1.2.3 From 94fdec768dc72a6993479f59a17daa413f30029e Mon Sep 17 00:00:00 2001 From: Jacek Anaszewski Date: Wed, 4 Mar 2015 08:14:22 -0800 Subject: leds: flash: Remove synchronized flash strobe feature Synchronized flash strobe feature has been considered not fitting for LED subsystem sysfs interface and thus is being removed. Signed-off-by: Jacek Anaszewski Acked-by: Kyungmin Park Cc: Richard Purdie Signed-off-by: Bryan Wu --- include/linux/led-class-flash.h | 14 -------------- include/linux/leds.h | 1 - 2 files changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/led-class-flash.h b/include/linux/led-class-flash.h index 3b5b9643cea1..21ec91e13c47 100644 --- a/include/linux/led-class-flash.h +++ b/include/linux/led-class-flash.h @@ -81,20 +81,6 @@ struct led_classdev_flash { /* LED Flash class sysfs groups */ const struct attribute_group *sysfs_groups[LED_FLASH_MAX_SYSFS_GROUPS]; - - /* LEDs available for flash strobe synchronization */ - struct led_classdev_flash **sync_leds; - - /* Number of LEDs available for flash strobe synchronization */ - int num_sync_leds; - - /* - * The identifier of the sub-led to synchronize the flash strobe with. - * Identifiers start from 1, which reflects the first element from the - * sync_leds array. 0 means that the flash strobe should not be - * synchronized. - */ - u32 sync_led_id; }; static inline struct led_classdev_flash *lcdev_to_flcdev( diff --git a/include/linux/leds.h b/include/linux/leds.h index ed634279062e..9a2b000094cf 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -47,7 +47,6 @@ struct led_classdev { #define SET_BRIGHTNESS_ASYNC (1 << 21) #define SET_BRIGHTNESS_SYNC (1 << 22) #define LED_DEV_CAP_FLASH (1 << 23) -#define LED_DEV_CAP_SYNC_STROBE (1 << 24) /* Set LED brightness level */ /* Must not sleep, use a workqueue if needed */ -- cgit v1.2.3 From 2f0f267ea0720ec6adbe9cf7386450425fac8258 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Mar 2015 10:49:19 +1030 Subject: cpumask: remove deprecated functions. Using these functions with offstack cpus is unsafe. They use all NR_CPUS bits, unstead of nr_cpumask_bits. In particular, lustre (in staging) used cpus_ and that caused a bug. Reported-by: Oleg Drokin Signed-off-by: Rusty Russell --- include/linux/cpumask.h | 151 ------------------------------------------------ 1 file changed, 151 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index dc037ae6f4f2..646fadee5caf 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -815,155 +815,4 @@ cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask) [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \ } } -/* - * - * From here down, all obsolete. Use cpumask_ variants! - * - */ -#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS - -#define CPU_MASK_CPU0 \ -(cpumask_t) { { \ - [0] = 1UL \ -} } - -#if NR_CPUS == 1 -#define first_cpu(src) ({ (void)(src); 0; }) -#define next_cpu(n, src) ({ (void)(src); 1; }) -#define any_online_cpu(mask) 0 -#define for_each_cpu_mask(cpu, mask) \ - for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) -#else /* NR_CPUS > 1 */ -int __first_cpu(const cpumask_t *srcp); -int __next_cpu(int n, const cpumask_t *srcp); - -#define first_cpu(src) __first_cpu(&(src)) -#define next_cpu(n, src) __next_cpu((n), &(src)) -#define any_online_cpu(mask) cpumask_any_and(&mask, cpu_online_mask) -#define for_each_cpu_mask(cpu, mask) \ - for ((cpu) = -1; \ - (cpu) = next_cpu((cpu), (mask)), \ - (cpu) < NR_CPUS; ) -#endif /* SMP */ - -#if NR_CPUS <= 64 - -#define for_each_cpu_mask_nr(cpu, mask) for_each_cpu_mask(cpu, mask) - -#else /* NR_CPUS > 64 */ - -int __next_cpu_nr(int n, const cpumask_t *srcp); -#define for_each_cpu_mask_nr(cpu, mask) \ - for ((cpu) = -1; \ - (cpu) = __next_cpu_nr((cpu), &(mask)), \ - (cpu) < nr_cpu_ids; ) - -#endif /* NR_CPUS > 64 */ - -#define cpus_addr(src) ((src).bits) - -#define cpu_set(cpu, dst) __cpu_set((cpu), &(dst)) -static inline void __cpu_set(int cpu, volatile cpumask_t *dstp) -{ - set_bit(cpu, dstp->bits); -} - -#define cpu_clear(cpu, dst) __cpu_clear((cpu), &(dst)) -static inline void __cpu_clear(int cpu, volatile cpumask_t *dstp) -{ - clear_bit(cpu, dstp->bits); -} - -#define cpus_setall(dst) __cpus_setall(&(dst), NR_CPUS) -static inline void __cpus_setall(cpumask_t *dstp, unsigned int nbits) -{ - bitmap_fill(dstp->bits, nbits); -} - -#define cpus_clear(dst) __cpus_clear(&(dst), NR_CPUS) -static inline void __cpus_clear(cpumask_t *dstp, unsigned int nbits) -{ - bitmap_zero(dstp->bits, nbits); -} - -/* No static inline type checking - see Subtlety (1) above. */ -#define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits) - -#define cpu_test_and_set(cpu, cpumask) __cpu_test_and_set((cpu), &(cpumask)) -static inline int __cpu_test_and_set(int cpu, cpumask_t *addr) -{ - return test_and_set_bit(cpu, addr->bits); -} - -#define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS) -static inline int __cpus_and(cpumask_t *dstp, const cpumask_t *src1p, - const cpumask_t *src2p, unsigned int nbits) -{ - return bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits); -} - -#define cpus_or(dst, src1, src2) __cpus_or(&(dst), &(src1), &(src2), NR_CPUS) -static inline void __cpus_or(cpumask_t *dstp, const cpumask_t *src1p, - const cpumask_t *src2p, unsigned int nbits) -{ - bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits); -} - -#define cpus_xor(dst, src1, src2) __cpus_xor(&(dst), &(src1), &(src2), NR_CPUS) -static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p, - const cpumask_t *src2p, unsigned int nbits) -{ - bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits); -} - -#define cpus_andnot(dst, src1, src2) \ - __cpus_andnot(&(dst), &(src1), &(src2), NR_CPUS) -static inline int __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p, - const cpumask_t *src2p, unsigned int nbits) -{ - return bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits); -} - -#define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS) -static inline int __cpus_equal(const cpumask_t *src1p, - const cpumask_t *src2p, unsigned int nbits) -{ - return bitmap_equal(src1p->bits, src2p->bits, nbits); -} - -#define cpus_intersects(src1, src2) __cpus_intersects(&(src1), &(src2), NR_CPUS) -static inline int __cpus_intersects(const cpumask_t *src1p, - const cpumask_t *src2p, unsigned int nbits) -{ - return bitmap_intersects(src1p->bits, src2p->bits, nbits); -} - -#define cpus_subset(src1, src2) __cpus_subset(&(src1), &(src2), NR_CPUS) -static inline int __cpus_subset(const cpumask_t *src1p, - const cpumask_t *src2p, unsigned int nbits) -{ - return bitmap_subset(src1p->bits, src2p->bits, nbits); -} - -#define cpus_empty(src) __cpus_empty(&(src), NR_CPUS) -static inline int __cpus_empty(const cpumask_t *srcp, unsigned int nbits) -{ - return bitmap_empty(srcp->bits, nbits); -} - -#define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS) -static inline int __cpus_weight(const cpumask_t *srcp, unsigned int nbits) -{ - return bitmap_weight(srcp->bits, nbits); -} - -#define cpus_shift_left(dst, src, n) \ - __cpus_shift_left(&(dst), &(src), (n), NR_CPUS) -static inline void __cpus_shift_left(cpumask_t *dstp, - const cpumask_t *srcp, int n, int nbits) -{ - bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); -} -#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */ - #endif /* __LINUX_CPUMASK_H */ -- cgit v1.2.3 From cdfdef75e795fb5ab76c66f3329e509f3ab8b9b5 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Mar 2015 10:49:19 +1030 Subject: cpumask: only allocate nr_cpumask_bits. Now we'll find out the hard way if anyone has CPUMASK_OFFSTACK and is returning these or assigning them. Signed-off-by: Rusty Russell --- include/linux/cpumask.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 646fadee5caf..4ad2d3c8e21f 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -11,6 +11,7 @@ #include #include +/* Don't assign or return these: may not be this big! */ typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; /** @@ -609,9 +610,7 @@ static inline int cpulist_parse(const char *buf, struct cpumask *dstp) */ static inline size_t cpumask_size(void) { - /* FIXME: Once all cpumask assignments are eliminated, this - * can be nr_cpumask_bits */ - return BITS_TO_LONGS(NR_CPUS) * sizeof(long); + return BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long); } /* -- cgit v1.2.3 From aa836df958886e57ff0d43fb3d79d1af4aec0cc8 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 9 Mar 2015 14:31:20 -0700 Subject: net: core: add of_find_net_device_by_node() Add a helper function which allows getting the struct net_device pointer associated with a given struct device_node pointer. This is useful for instance for DSA Ethernet devices not backed by a platform_device, but a PCI device. Since we need to access net_class which is not accessible outside of net/core/net-sysfs.c, this helper function is also added here and gated with CONFIG_OF_NET. Network devices initialized with SET_NETDEV_DEV() are also taken into account by checking for dev->parent first and then falling back to checking the device pointer within struct net_device. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/of_net.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_net.h b/include/linux/of_net.h index 34597c8c1a4c..9cd72aab76fe 100644 --- a/include/linux/of_net.h +++ b/include/linux/of_net.h @@ -9,8 +9,11 @@ #ifdef CONFIG_OF_NET #include + +struct net_device; extern int of_get_phy_mode(struct device_node *np); extern const void *of_get_mac_address(struct device_node *np); +extern struct net_device *of_find_net_device_by_node(struct device_node *np); #else static inline int of_get_phy_mode(struct device_node *np) { @@ -21,6 +24,11 @@ static inline const void *of_get_mac_address(struct device_node *np) { return NULL; } + +static inline struct net_device *of_find_net_device_by_node(struct device_node *np) +{ + return NULL; +} #endif #endif /* __LINUX_OF_NET_H */ -- cgit v1.2.3 From f8f2147150de303e814c0452075d467734d3544b Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Mon, 9 Mar 2015 13:59:09 -0700 Subject: switchdev: add netlink flags to IPv4 FIB add op Pass in the netlink flags (NLM_F_*) into switchdev driver for IPv4 FIB add op to allow driver to 1) optimize hardware updates, 2) handle ip route prepend and append commands correctly. Suggested-by: Jamal Hadi Salim Suggested-by: Roopa Prabhu Signed-off-by: Scott Feldman Reviewed-by: Simon Horman Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 45413784a3b1..1354ae83efc8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1035,7 +1035,7 @@ struct fib_info; * state change. * int (*ndo_sw_parent_fib_ipv4_add)(struct net_device *dev, __be32 dst, * int dst_len, struct fib_info *fi, - * u8 tos, u8 type, u32 tb_id); + * u8 tos, u8 type, u32 nlflags, u32 tb_id); * Called to add/modify IPv4 route to switch device. * int (*ndo_sw_parent_fib_ipv4_del)(struct net_device *dev, __be32 dst, * int dst_len, struct fib_info *fi, @@ -1207,6 +1207,7 @@ struct net_device_ops { int dst_len, struct fib_info *fi, u8 tos, u8 type, + u32 nlflags, u32 tb_id); int (*ndo_switch_fib_ipv4_del)(struct net_device *dev, __be32 dst, -- cgit v1.2.3 From 59e33c2b021322db92ca27c4d9958e57630443b6 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 9 Mar 2015 15:44:13 -0700 Subject: net: phy: bcm7xxx: add alternate id for 7439 BCM7439 has an alternate PHY OUI: 0xae025080 which is to be found in some variants of this chip. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 7ccd928cc1f2..cab606617522 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -19,6 +19,7 @@ #define PHY_ID_BCM7425 0x03625e60 #define PHY_ID_BCM7429 0x600d8730 #define PHY_ID_BCM7439 0x600d8480 +#define PHY_ID_BCM7439_2 0xae025080 #define PHY_ID_BCM7445 0x600d8510 #define PHY_BCM_OUI_MASK 0xfffffc00 -- cgit v1.2.3 From 548ef28449c0c06f92194c40ff0eaed248cb4b75 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 24 Feb 2015 21:29:25 +0100 Subject: KVM: Get rid of kvm_kvfree() kvm_kvfree() provides exactly the same functionality as the new common kvfree() function - so let's simply replace the kvm function with the common function. Signed-off-by: Thomas Huth Signed-off-by: Marcelo Tosatti --- include/linux/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d12b2104d19b..0f574ebc82f4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -658,7 +658,6 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); void *kvm_kvzalloc(unsigned long size); -void kvm_kvfree(const void *addr); #ifndef __KVM_HAVE_ARCH_VM_ALLOC static inline struct kvm *kvm_arch_alloc_vm(void) -- cgit v1.2.3 From 491da2a477077357c8206a601559e2ea58f224db Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Mar 2015 07:15:52 -0700 Subject: net: constify sock_diag_check_cookie() sock_diag_check_cookie() second parameter is constant Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/sock_diag.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index 46cca4c06848..b5ad7d35a636 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -19,7 +19,7 @@ void sock_diag_unregister(const struct sock_diag_handler *h); void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); -int sock_diag_check_cookie(void *sk, __u32 *cookie); +int sock_diag_check_cookie(void *sk, const __u32 *cookie); void sock_diag_save_cookie(void *sk, __u32 *cookie); int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr); -- cgit v1.2.3 From 34160ea3f9c96b5ae71a11459f9b9f6c298b8930 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Mar 2015 07:15:54 -0700 Subject: inet_diag: add const to inet_diag_req_v2 diag dumpers should not modify the request. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 46da02410a09..ac48b10c9395 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -11,33 +11,34 @@ struct sk_buff; struct netlink_callback; struct inet_diag_handler { - void (*dump)(struct sk_buff *skb, - struct netlink_callback *cb, - struct inet_diag_req_v2 *r, - struct nlattr *bc); - - int (*dump_one)(struct sk_buff *in_skb, - const struct nlmsghdr *nlh, - struct inet_diag_req_v2 *req); - - void (*idiag_get_info)(struct sock *sk, - struct inet_diag_msg *r, - void *info); - __u16 idiag_type; + void (*dump)(struct sk_buff *skb, + struct netlink_callback *cb, + const struct inet_diag_req_v2 *r, + struct nlattr *bc); + + int (*dump_one)(struct sk_buff *in_skb, + const struct nlmsghdr *nlh, + const struct inet_diag_req_v2 *req); + + void (*idiag_get_info)(struct sock *sk, + struct inet_diag_msg *r, + void *info); + __u16 idiag_type; }; struct inet_connection_sock; int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, - struct sk_buff *skb, struct inet_diag_req_v2 *req, - struct user_namespace *user_ns, - u32 pid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh); + struct sk_buff *skb, const struct inet_diag_req_v2 *req, + struct user_namespace *user_ns, + u32 pid, u32 seq, u16 nlmsg_flags, + const struct nlmsghdr *unlh); void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb, - struct netlink_callback *cb, struct inet_diag_req_v2 *r, - struct nlattr *bc); + struct netlink_callback *cb, + const struct inet_diag_req_v2 *r, + struct nlattr *bc); int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, - struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req_v2 *req); + struct sk_buff *in_skb, const struct nlmsghdr *nlh, + const struct inet_diag_req_v2 *req); int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); -- cgit v1.2.3 From adf9c3c85615f41c08559086e0d9ecdc6cc9db71 Mon Sep 17 00:00:00 2001 From: Joseph Kogut Date: Mon, 16 Feb 2015 19:32:46 -0700 Subject: usb: move definition of PCI_VENDOR_ID_SYNOPSYS to linux/pci_ids.h Removed FIXME from usb/dwc3/dwc3-pci.c by moving definition of PCI_VENDOR_ID_SYNOPSYS shared with usb/dwc2 to linux/pci_ids.h. Signed-off-by: Joseph Kogut Signed-off-by: Felipe Balbi --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index e63c02a93f6b..38cff8f6716d 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2315,6 +2315,8 @@ #define PCI_VENDOR_ID_CENATEK 0x16CA #define PCI_DEVICE_ID_CENATEK_IDE 0x0001 +#define PCI_VENDOR_ID_SYNOPSYS 0x16c3 + #define PCI_VENDOR_ID_VITESSE 0x1725 #define PCI_DEVICE_ID_VITESSE_VSC7174 0x7174 -- cgit v1.2.3 From f563d230903210acc2336af58e422216b68ded76 Mon Sep 17 00:00:00 2001 From: Andrzej Pietrasiewicz Date: Tue, 3 Mar 2015 10:52:23 +0100 Subject: usb: gadget: composite: add req_match method to usb_function Non-standard requests can encode the actual interface number in a non-standard way. For example composite_setup() assumes that it is w_index && 0xFF, but the printer function encodes the interface number in a context-dependet way (either w_index or w_index >> 8). This can lead to such requests being directed to wrong functions. This patch adds req_match() method to usb_function. Its purpose is to verify that a given request can be handled by a given function. If any function within a configuration provides the method and it returns true, then it is assumed that the right function is found. If a function uses req_match(), it should try as hard as possible to determine if the request is meant for it. If no functions in a configuration provide req_match or none of them returns true, then fall back to the usual approach. Signed-off-by: Andrzej Pietrasiewicz Signed-off-by: Felipe Balbi --- include/linux/usb/composite.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 3d87defcc527..2511469a9904 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -148,6 +148,7 @@ struct usb_os_desc_table { * @disable: (REQUIRED) Indicates the function should be disabled. Reasons * include host resetting or reconfiguring the gadget, and disconnection. * @setup: Used for interface-specific control requests. + * @req_match: Tests if a given class request can be handled by this function. * @suspend: Notifies functions when the host stops sending USB traffic. * @resume: Notifies functions when the host restarts USB traffic. * @get_status: Returns function status as a reply to @@ -213,6 +214,8 @@ struct usb_function { void (*disable)(struct usb_function *); int (*setup)(struct usb_function *, const struct usb_ctrlrequest *); + bool (*req_match)(struct usb_function *, + const struct usb_ctrlrequest *); void (*suspend)(struct usb_function *); void (*resume)(struct usb_function *); -- cgit v1.2.3 From 06ed0de5188c9ef6553b2824b6cdd767ad46ece5 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Tue, 10 Mar 2015 22:37:46 +0900 Subject: usb: gadget: Fix typo fond in Documentation/Docbook/gadget.xml This patch fix some spelling typo found in gadget.xml. It is because this file is generated from comments in sources, I had to fix comments in the source, instead of xml file itself. Signed-off-by: Masanari Iida Signed-off-by: Felipe Balbi --- include/linux/usb/gadget.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index e2f00fd8cd47..02476b3a1109 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -190,7 +190,7 @@ struct usb_ep { * @ep:the endpoint being configured * @maxpacket_limit:value of maximum packet size limit * - * This function shoud be used only in UDC drivers to initialize endpoint + * This function should be used only in UDC drivers to initialize endpoint * (usually in probe function). */ static inline void usb_ep_set_maxpacket_limit(struct usb_ep *ep, -- cgit v1.2.3 From 916f743da3546c28a2f350d197e3bea95d97ba15 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Thu, 26 Feb 2015 15:49:09 -0600 Subject: firmware: qcom: scm: Move the scm driver to drivers/firmware Architectural changes in the ARM Linux kernel tree mandate the eventual removal of the mach-* directories. Move the scm driver to drivers/firmware and the scm header to include/linux to support that removal. Signed-off-by: Kumar Gala --- include/linux/qcom_scm.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 include/linux/qcom_scm.h (limited to 'include/linux') diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h new file mode 100644 index 000000000000..6bb84cffb396 --- /dev/null +++ b/include/linux/qcom_scm.h @@ -0,0 +1,29 @@ +/* Copyright (c) 2010, Code Aurora Forum. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#ifndef __QCOM_SCM_H +#define __QCOM_SCM_H + +#define QCOM_SCM_FLAG_COLDBOOT_CPU1 0x01 +#define QCOM_SCM_FLAG_COLDBOOT_CPU2 0x08 +#define QCOM_SCM_FLAG_COLDBOOT_CPU3 0x20 +#define QCOM_SCM_FLAG_WARMBOOT_CPU0 0x04 +#define QCOM_SCM_FLAG_WARMBOOT_CPU1 0x02 +#define QCOM_SCM_FLAG_WARMBOOT_CPU2 0x10 +#define QCOM_SCM_FLAG_WARMBOOT_CPU3 0x40 + +extern int qcom_scm_set_boot_addr(u32 addr, int flags); + +#define QCOM_SCM_VERSION(major, minor) (((major) << 16) | ((minor) & 0xFF)) + +extern u32 qcom_scm_get_version(void); + +#endif -- cgit v1.2.3 From a353e4a06f24235138d483a2625726a5fc472949 Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Mon, 2 Mar 2015 16:30:28 -0700 Subject: firmware: qcom: scm: Clean cold boot entry to export only the API We dont need to export the SCM specific cold boot flags to the platform code. Export only a function to set the cold boot address. Signed-off-by: Lina Iyer Signed-off-by: Kumar Gala --- include/linux/qcom_scm.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index 6bb84cffb396..68a1d8801c6f 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -12,15 +12,12 @@ #ifndef __QCOM_SCM_H #define __QCOM_SCM_H -#define QCOM_SCM_FLAG_COLDBOOT_CPU1 0x01 -#define QCOM_SCM_FLAG_COLDBOOT_CPU2 0x08 -#define QCOM_SCM_FLAG_COLDBOOT_CPU3 0x20 #define QCOM_SCM_FLAG_WARMBOOT_CPU0 0x04 #define QCOM_SCM_FLAG_WARMBOOT_CPU1 0x02 #define QCOM_SCM_FLAG_WARMBOOT_CPU2 0x10 #define QCOM_SCM_FLAG_WARMBOOT_CPU3 0x40 -extern int qcom_scm_set_boot_addr(u32 addr, int flags); +extern int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus); #define QCOM_SCM_VERSION(major, minor) (((major) << 16) | ((minor) & 0xFF)) -- cgit v1.2.3 From 2ce76a6ad32fa076a2bb5561e859c97fceec8bb1 Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Mon, 2 Mar 2015 16:30:29 -0700 Subject: firmware: qcom: scm: Add qcom_scm_set_warm_boot_addr function A core can be powered down for cpuidle or when it is hotplugged off. In either case, the warmboot return address would be different. Allow setting the warmboot address for a specific cpu, optimize and write to the firmware, if the address is different than the previously set address. Export qcom_scm_set_warm_boot_addr function move the warm boot flags to implementation. Signed-off-by: Lina Iyer Signed-off-by: Kumar Gala --- include/linux/qcom_scm.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index 68a1d8801c6f..95ef72a47b0f 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -1,4 +1,5 @@ /* Copyright (c) 2010, Code Aurora Forum. All rights reserved. + * Copyright (C) 2015 Linaro Ltd. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -12,12 +13,8 @@ #ifndef __QCOM_SCM_H #define __QCOM_SCM_H -#define QCOM_SCM_FLAG_WARMBOOT_CPU0 0x04 -#define QCOM_SCM_FLAG_WARMBOOT_CPU1 0x02 -#define QCOM_SCM_FLAG_WARMBOOT_CPU2 0x10 -#define QCOM_SCM_FLAG_WARMBOOT_CPU3 0x40 - extern int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus); +extern int qcom_scm_set_warm_boot_addr(void *entry, const cpumask_t *cpus); #define QCOM_SCM_VERSION(major, minor) (((major) << 16) | ((minor) & 0xFF)) -- cgit v1.2.3 From 767b0235dd476596c0d4154839ae6880bec71b3c Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Mon, 2 Mar 2015 16:30:30 -0700 Subject: firmware: qcom: scm: Support cpu power down through SCM Support powering down the calling cpu, by trapping into SCM. This termination function triggers the ARM cpu to execute WFI instruction, causing the power controller to safely power the cpu down. Caches may be flushed before powering down the cpu. If cache controller is set to turn off when the cpu is powered down, then the flags argument indicates to the secure mode to flush its cache lines before executing WFI.The warm boot reset address for the cpu should be set before the calling into this function for the cpu to resume. The original code for the qcom_scm_call_atomic1() comes from a patch by Stephen Boyd [1]. The function scm_call_atomic1() has been cherry picked and renamed to match the convention used in this file. Since there are no users of scm_call_atomic2(), the function is not included. [1]. https://lkml.org/lkml/2014/8/4/765 Signed-off-by: Stephen Boyd Signed-off-by: Lina Iyer Signed-off-by: Kumar Gala --- include/linux/qcom_scm.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index 95ef72a47b0f..d7a974d5f57c 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2010, Code Aurora Forum. All rights reserved. +/* Copyright (c) 2010-2014, The Linux Foundation. All rights reserved. * Copyright (C) 2015 Linaro Ltd. * * This program is free software; you can redistribute it and/or modify @@ -16,6 +16,11 @@ extern int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus); extern int qcom_scm_set_warm_boot_addr(void *entry, const cpumask_t *cpus); +#define QCOM_SCM_CPU_PWR_DOWN_L2_ON 0x0 +#define QCOM_SCM_CPU_PWR_DOWN_L2_OFF 0x1 + +extern void qcom_scm_cpu_power_down(u32 flags); + #define QCOM_SCM_VERSION(major, minor) (((major) << 16) | ((minor) & 0xFF)) extern u32 qcom_scm_get_version(void); -- cgit v1.2.3 From 8038dad7e888581266c76df15d70ca457a3c5910 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 25 Feb 2015 10:34:39 -0800 Subject: smpboot: Add common code for notification from dying CPU RCU ignores offlined CPUs, so they cannot safely run RCU read-side code. (They -can- use SRCU, but not RCU.) This means that any use of RCU during or after the call to arch_cpu_idle_dead(). Unfortunately, commit 2ed53c0d6cc99 added a complete() call, which will contain RCU read-side critical sections if there is a task waiting to be awakened. Which, as it turns out, there almost never is. In my qemu/KVM testing, the to-be-awakened task is not yet asleep more than 99.5% of the time. In current mainline, failure is even harder to reproduce, requiring a virtualized environment that delays the outgoing CPU by at least three jiffies between the time it exits its stop_machine() task at CPU_DYING time and the time it calls arch_cpu_idle_dead() from the idle loop. However, this problem really can occur, especially in virtualized environments, and therefore really does need to be fixed This suggests moving back to the polling loop, but using a much shorter wait, with gentle exponential backoff instead of the old 100-millisecond wait. Most of the time, the loop will exit without waiting at all, and almost all of the remaining uses will wait only five microseconds. If the outgoing CPU is preempted, a loop will wait one jiffy, then increase the wait by a factor of 11/10ths, rounding up. As before, there is a five-second timeout. This commit therefore provides common-code infrastructure to do the dying-to-surviving CPU handoff in a safe manner. This code also provides an indication at CPU-online of whether the CPU to be onlined previously timed out on offline. The new cpu_check_up_prepare() function returns -EBUSY if this CPU previously took more than five seconds to go offline, or -EAGAIN if it has not yet managed to go offline. The rationale for -EAGAIN is that it might still be preempted, so an additional wait might well find it correctly offlined. Architecture-specific code can decide how to handle these conditions. Systems in which CPUs take themselves completely offline might respond to an -EBUSY return as if it was a zero (success) return. Systems in which the surviving CPU must take some action might take it at this time, or might simply mark the other CPU as unusable. Note that architectures that take the easy way out and simply pass the -EBUSY and -EAGAIN upwards will change the sysfs API. Signed-off-by: Paul E. McKenney Cc: Cc: [ paulmck: Fixed state machine for architectures that don't check earlier CPU-hotplug results as suggested by James Hogan. ] --- include/linux/cpu.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 4260e8594bd7..4744ef915acd 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -95,6 +95,8 @@ enum { * Called on the new cpu, just before * enabling interrupts. Must not sleep, * must not fail */ +#define CPU_BROKEN 0x000C /* CPU (unsigned)v did not die properly, + * perhaps due to preemption. */ /* Used for CPU hotplug events occurring while tasks are frozen due to a suspend * operation in progress @@ -271,4 +273,14 @@ void arch_cpu_idle_enter(void); void arch_cpu_idle_exit(void); void arch_cpu_idle_dead(void); +DECLARE_PER_CPU(bool, cpu_dead_idle); + +int cpu_report_state(int cpu); +int cpu_check_up_prepare(int cpu); +void cpu_set_state_online(int cpu); +#ifdef CONFIG_HOTPLUG_CPU +bool cpu_wait_death(unsigned int cpu, int seconds); +bool cpu_report_death(void); +#endif /* #ifdef CONFIG_HOTPLUG_CPU */ + #endif /* _LINUX_CPU_H_ */ -- cgit v1.2.3 From 988dfbd795cf08b00576c1ced4210281b2bccffc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 10 Mar 2015 09:27:55 +1100 Subject: rhashtable: Move hash_rnd into bucket_table Currently hash_rnd is a parameter that users can set. However, no existing users set this parameter. It is also something that people are unlikely to want to set directly since it's just a random number. In preparation for allowing the reseeding/rehashing of rhashtable, this patch moves hash_rnd into bucket_table so that it's now an internal state rather than a parameter. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index d438eeb08bff..5ef8ea551556 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -49,12 +49,14 @@ struct rhash_head { /** * struct bucket_table - Table of hash buckets * @size: Number of hash buckets + * @hash_rnd: Random seed to fold into hash * @locks_mask: Mask to apply before accessing locks[] * @locks: Array of spinlocks protecting individual buckets * @buckets: size * hash buckets */ struct bucket_table { size_t size; + u32 hash_rnd; unsigned int locks_mask; spinlock_t *locks; @@ -72,7 +74,6 @@ struct rhashtable; * @key_len: Length of key * @key_offset: Offset of key in struct to be hashed * @head_offset: Offset of rhash_head in struct to be hashed - * @hash_rnd: Seed to use while hashing * @max_shift: Maximum number of shifts while expanding * @min_shift: Minimum number of shifts while shrinking * @nulls_base: Base value to generate nulls marker @@ -85,7 +86,6 @@ struct rhashtable_params { size_t key_len; size_t key_offset; size_t head_offset; - u32 hash_rnd; size_t max_shift; size_t min_shift; u32 nulls_base; -- cgit v1.2.3 From 33d6737761ea425d43f3b6e4561573a4020982f2 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 10 Mar 2015 16:57:11 -0700 Subject: of: mdio: export of_mdio_parse_addr Export of_mdio_parse_addr() which allows parsing a given Ethernet PHY node MDIO address, verify it is within the allowed range, and return its value. This is going to be useful for the DSA code which needs to deal with multiple layers of MDIO buses. Signed-off-by: Florian Fainelli Acked-by: Rob Herring Signed-off-by: David S. Miller --- include/linux/of_mdio.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index d449018d0726..8f2237eb3485 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -24,6 +24,7 @@ struct phy_device *of_phy_attach(struct net_device *dev, phy_interface_t iface); extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np); +extern int of_mdio_parse_addr(struct device *dev, const struct device_node *np); #else /* CONFIG_OF */ static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) @@ -60,6 +61,12 @@ static inline struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np) { return NULL; } + +static inline int of_mdio_parse_addr(struct device *dev, + const struct device_node *np) +{ + return -ENOSYS; +} #endif /* CONFIG_OF */ #if defined(CONFIG_OF) && defined(CONFIG_FIXED_PHY) -- cgit v1.2.3 From d09957fbb4d0b059b3176b510540df69048ad170 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 10 Mar 2015 17:48:37 +0100 Subject: mtd: cfi: reduce stack size The cfi_staa_write_buffers function uses a large amount of kernel stack whenever CONFIG_MTD_MAP_BANK_WIDTH_32 is set, and that results in a warning on ARM allmodconfig builds: drivers/mtd/chips/cfi_cmdset_0020.c: In function 'cfi_staa_write_buffers': drivers/mtd/chips/cfi_cmdset_0020.c:651:1: warning: the frame size of 1208 bytes is larger than 1024 bytes [-Wframe-larger-than=] It turns out that this is largely a result of a suboptimal implementation of map_word_andequal(). Replacing this function with a straightforward one reduces the stack size in this function by exactly 200 bytes, shrinks the .text segment for this file from 27648 bytes to 26608 bytes, and makes the warning go away. Signed-off-by: Arnd Bergmann Signed-off-by: Brian Norris --- include/linux/mtd/map.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index 5f487d776411..a872157a0700 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -314,7 +314,17 @@ static inline map_word map_word_or(struct map_info *map, map_word val1, map_word return r; } -#define map_word_andequal(m, a, b, z) map_word_equal(m, z, map_word_and(m, a, b)) +static inline int map_word_andequal(struct map_info *map, map_word val1, map_word val2, map_word val3) +{ + int i; + + for (i = 0; i < map_words(map); i++) { + if ((val1.x[i] & val2.x[i]) != val3.x[i]) + return 0; + } + + return 1; +} static inline int map_word_bitsset(struct map_info *map, map_word val1, map_word val2) { -- cgit v1.2.3 From 7234bea69de200e2060d099685c4c674b556edc0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 10 Mar 2015 17:51:55 +0100 Subject: mtd: clean up whitespace in linux/mtd/map.h As the only comments I got for the "mtd: cfi: reduce stack size" patch were about whitespace changes, it appears necessary to fix up the rest of the file as well, which contains the exact same mistakes. Signed-off-by: Arnd Bergmann Signed-off-by: Brian Norris --- include/linux/mtd/map.h | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index a872157a0700..29975c73a953 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -77,7 +77,7 @@ /* ensure we never evaluate anything shorted than an unsigned long * to zero, and ensure we'll never miss the end of an comparison (bjd) */ -#define map_calc_words(map) ((map_bankwidth(map) + (sizeof(unsigned long)-1))/ sizeof(unsigned long)) +#define map_calc_words(map) ((map_bankwidth(map) + (sizeof(unsigned long)-1)) / sizeof(unsigned long)) #ifdef CONFIG_MTD_MAP_BANK_WIDTH_8 # ifdef map_bankwidth @@ -181,7 +181,7 @@ static inline int map_bankwidth_supported(int w) } } -#define MAX_MAP_LONGS ( ((MAX_MAP_BANKWIDTH*8) + BITS_PER_LONG - 1) / BITS_PER_LONG ) +#define MAX_MAP_LONGS (((MAX_MAP_BANKWIDTH * 8) + BITS_PER_LONG - 1) / BITS_PER_LONG) typedef union { unsigned long x[MAX_MAP_LONGS]; @@ -264,20 +264,22 @@ void unregister_mtd_chip_driver(struct mtd_chip_driver *); struct mtd_info *do_map_probe(const char *name, struct map_info *map); void map_destroy(struct mtd_info *mtd); -#define ENABLE_VPP(map) do { if(map->set_vpp) map->set_vpp(map, 1); } while(0) -#define DISABLE_VPP(map) do { if(map->set_vpp) map->set_vpp(map, 0); } while(0) +#define ENABLE_VPP(map) do { if (map->set_vpp) map->set_vpp(map, 1); } while (0) +#define DISABLE_VPP(map) do { if (map->set_vpp) map->set_vpp(map, 0); } while (0) #define INVALIDATE_CACHED_RANGE(map, from, size) \ - do { if(map->inval_cache) map->inval_cache(map, from, size); } while(0) + do { if (map->inval_cache) map->inval_cache(map, from, size); } while (0) static inline int map_word_equal(struct map_info *map, map_word val1, map_word val2) { int i; - for (i=0; ivirt + ofs); #endif else if (map_bankwidth_is_large(map)) - memcpy_fromio(r.x, map->virt+ofs, map->bankwidth); + memcpy_fromio(r.x, map->virt + ofs, map->bankwidth); else BUG(); -- cgit v1.2.3 From 33cf7c90fe2f97afb1cadaa0cfb782cb9d1b9ee2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Mar 2015 18:53:14 -0700 Subject: net: add real socket cookies A long standing problem in netlink socket dumps is the use of kernel socket addresses as cookies. 1) It is a security concern. 2) Sockets can be reused quite quickly, so there is no guarantee a cookie is used once and identify a flow. 3) request sock, establish sock, and timewait socks for a given flow have different cookies. Part of our effort to bring better TCP statistics requires to switch to a different allocator. In this patch, I chose to use a per network namespace 64bit generator, and to use it only in the case a socket needs to be dumped to netlink. (This might be refined later if needed) Note that I tried to carry cookies from request sock, to establish sock, then timewait sockets. Signed-off-by: Eric Dumazet Cc: Eric Salo Signed-off-by: David S. Miller --- include/linux/sock_diag.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index b5ad7d35a636..083ac388098e 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -19,8 +19,8 @@ void sock_diag_unregister(const struct sock_diag_handler *h); void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); -int sock_diag_check_cookie(void *sk, const __u32 *cookie); -void sock_diag_save_cookie(void *sk, __u32 *cookie); +int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie); +void sock_diag_save_cookie(struct sock *sk, __u32 *cookie); int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr); int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk, -- cgit v1.2.3 From 2c6e0277e1eab3df5db81c59e408b7b1c14b1b72 Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Wed, 11 Mar 2015 17:26:41 -0500 Subject: HID: multitouch: Add support for button type usage According to [1], Windows Precision Touchpad devices must supply a button type usage in the device capabilities feature report. A value of 0 indicates that the device contains a depressible button (i.e. it's a click-pad) whereas a value of 1 indicates a non-depressible button. Add support for this usage and set INPUT_PROP_BUTTONPAD on the touchpad input device whenever a depressible button is present. [1] https://msdn.microsoft.com/en-us/library/windows/hardware/dn467314(v=vs.85).aspx Signed-off-by: Seth Forshee Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- include/linux/hid.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index efc7787a41a8..f455c38d7562 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -269,6 +269,7 @@ struct hid_item { #define HID_DG_DEVICEINDEX 0x000d0053 #define HID_DG_CONTACTCOUNT 0x000d0054 #define HID_DG_CONTACTMAX 0x000d0055 +#define HID_DG_BUTTONTYPE 0x000d0059 #define HID_DG_BARRELSWITCH2 0x000d005a #define HID_DG_TOOLSERIALNUMBER 0x000d005b -- cgit v1.2.3 From 315491e5d6ee66838a18a8ca0c14e6ffb376e48c Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 9 Jan 2015 15:21:58 -0600 Subject: remoteproc: add IOMMU hardware capability flag The remoteproc framework currently relies on iommu_present() on the bus the device is on, to perform MMU management. However, this logic doesn't scale for multi-arch, especially for processors that do not have an IOMMU. Replace this logic instead by using a h/w capability flag for the presence of IOMMU in the rproc structure. This issue is seen on OMAP platforms when trying to add a remoteproc driver for a small Cortex M3 called the WkupM3 used for suspend / resume management on TI AM335/AM437x SoCs. This processor does not have an MMU. Same is the case with another processor subsystem PRU-ICSS on AM335/AM437x. All these are platform devices, and the current iommu_present check will not scale for the same kernel image to support OMAP4/OMAP5 and AM335/AM437x. The existing platform implementation drivers - OMAP remoteproc, STE Modem remoteproc and DA8xx remoteproc, are updated as well to properly configure the newly added rproc field. Cc: Robert Tivy Cc: Linus Walleij Signed-off-by: Suman Anna [small change in the commit title and in a single comment] Signed-off-by: Ohad Ben-Cohen --- include/linux/remoteproc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 9e7e745dac55..78b8a9b9d40a 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -404,6 +404,7 @@ enum rproc_crash_type { * @table_ptr: pointer to the resource table in effect * @cached_table: copy of the resource table * @table_csum: checksum of the resource table + * @has_iommu: flag to indicate if remote processor is behind an MMU */ struct rproc { struct klist_node node; @@ -435,6 +436,7 @@ struct rproc { struct resource_table *table_ptr; struct resource_table *cached_table; u32 table_csum; + bool has_iommu; }; /* we currently support only two vrings per rvdev */ -- cgit v1.2.3 From e19f742885e87ab9582fdf5940f214419eb9275b Mon Sep 17 00:00:00 2001 From: Chris Zhong Date: Sat, 28 Feb 2015 18:09:06 +0800 Subject: mfd: rk808: Disable the under voltage detect Rk808 has a under voltage detect function, when the voltage of buck is under 85% the target voltage, the buck output will reset. But if the power load is too heavy, this function maybe err, when current over 4.2A, although the voltage is normal, but RK808 mistakenly think it is under 85%, and reset the buck. So let's disable this function. Signed-off-by: Chris Zhong Signed-off-by: Lee Jones --- include/linux/mfd/rk808.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/rk808.h b/include/linux/mfd/rk808.h index fb09312d854b..441b6ee72691 100644 --- a/include/linux/mfd/rk808.h +++ b/include/linux/mfd/rk808.h @@ -156,6 +156,9 @@ enum rk808_reg { #define BUCK2_RATE_MASK (3 << 3) #define MASK_ALL 0xff +#define BUCK_UV_ACT_MASK 0x0f +#define BUCK_UV_ACT_DISABLE 0 + #define SWITCH2_EN BIT(6) #define SWITCH1_EN BIT(5) #define DEV_OFF_RST BIT(3) -- cgit v1.2.3 From 3be62b957d2c02cf744d3845091dd8a5fdca2039 Mon Sep 17 00:00:00 2001 From: Jacek Anaszewski Date: Wed, 4 Mar 2015 17:14:29 +0100 Subject: mfd: max77693: Adjust FLASH_EN_SHIFT and TORCH_EN_SHIFT macros Modify FLASH_EN_SHIFT and TORCH_EN_SHIFT macros to work properly when passed enum max77693_fled values (0 for FLED1 and 1 for FLED2) from leds-max77693 driver. Previous definitions were compatible with one of the previous RFC versions of leds-max77693.c driver, which was not merged. Signed-off-by: Jacek Anaszewski Acked-by: Kyungmin Park Signed-off-by: Lee Jones --- include/linux/mfd/max77693-private.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h index 955dd990beaf..5acd7104ad34 100644 --- a/include/linux/mfd/max77693-private.h +++ b/include/linux/mfd/max77693-private.h @@ -113,8 +113,8 @@ enum max77693_pmic_reg { #define FLASH_EN_FLASH 0x1 #define FLASH_EN_TORCH 0x2 #define FLASH_EN_ON 0x3 -#define FLASH_EN_SHIFT(x) (6 - ((x) - 1) * 2) -#define TORCH_EN_SHIFT(x) (2 - ((x) - 1) * 2) +#define FLASH_EN_SHIFT(x) (6 - (x) * 2) +#define TORCH_EN_SHIFT(x) (2 - (x) * 2) /* MAX77693 MAX_FLASH1 register */ #define MAX_FLASH1_MAX_FL_EN 0x80 -- cgit v1.2.3 From a307de2aab5bcb96bae3d778b01ff815f027ad88 Mon Sep 17 00:00:00 2001 From: Jacek Anaszewski Date: Wed, 4 Mar 2015 17:14:27 +0100 Subject: mfd: max77693: Remove struct max77693_led_platform_data The flash part of the max77693 device will depend only on OF, and thus will not use board files. Since there are no other users of the struct max77693_led_platform_data its existence is unjustified. Signed-off-by: Jacek Anaszewski Acked-by: Kyungmin Park Signed-off-by: Lee Jones --- include/linux/mfd/max77693.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/max77693.h b/include/linux/mfd/max77693.h index f0b6585cd874..ce894b6e6315 100644 --- a/include/linux/mfd/max77693.h +++ b/include/linux/mfd/max77693.h @@ -87,19 +87,6 @@ enum max77693_led_boost_mode { MAX77693_LED_BOOST_FIXED, }; -struct max77693_led_platform_data { - u32 fleds[2]; - u32 iout_torch[2]; - u32 iout_flash[2]; - u32 trigger[2]; - u32 trigger_type[2]; - u32 num_leds; - u32 boost_mode; - u32 flash_timeout; - u32 boost_vout; - u32 low_vsys; -}; - /* MAX77693 */ struct max77693_platform_data { -- cgit v1.2.3 From 419d55bbb80370ed1e8a36d7884cfcf977e73e29 Mon Sep 17 00:00:00 2001 From: Jacek Anaszewski Date: Wed, 4 Mar 2015 17:14:28 +0100 Subject: mfd: max77693: Add TORCH_IOUT_MASK macro Add a macro for obtaining the mask of ITORCH register bit fields related either to FLED1 or FLED2 current output. The expected arguments are TORCH_IOUT1_SHIFT or TORCH_IOUT2_SHIFT. Signed-off-by: Jacek Anaszewski Acked-by: Kyungmin Park Signed-off-by: Lee Jones --- include/linux/mfd/max77693-private.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h index 5acd7104ad34..51633ea6f910 100644 --- a/include/linux/mfd/max77693-private.h +++ b/include/linux/mfd/max77693-private.h @@ -87,6 +87,7 @@ enum max77693_pmic_reg { /* MAX77693 ITORCH register */ #define TORCH_IOUT1_SHIFT 0 #define TORCH_IOUT2_SHIFT 4 +#define TORCH_IOUT_MASK(x) (0xf << (x)) #define TORCH_IOUT_MIN 15625 #define TORCH_IOUT_MAX 250000 #define TORCH_IOUT_STEP 15625 -- cgit v1.2.3 From 2698dc22292e3e5fc2b24b2748018dcc09d70989 Mon Sep 17 00:00:00 2001 From: Gyungoh Yoo Date: Fri, 27 Feb 2015 15:42:21 +0900 Subject: mfd: Add support for Skyworks SKY81452 driver Signed-off-by: Gyungoh Yoo Signed-off-by: Lee Jones --- include/linux/mfd/sky81452.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 include/linux/mfd/sky81452.h (limited to 'include/linux') diff --git a/include/linux/mfd/sky81452.h b/include/linux/mfd/sky81452.h new file mode 100644 index 000000000000..b0925fa3e9ef --- /dev/null +++ b/include/linux/mfd/sky81452.h @@ -0,0 +1,31 @@ +/* + * sky81452.h SKY81452 MFD driver + * + * Copyright 2014 Skyworks Solutions Inc. + * Author : Gyungoh Yoo + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef _SKY81452_H +#define _SKY81452_H + +#include +#include + +struct sky81452_platform_data { + struct sky81452_bl_platform_data *bl_pdata; + struct regulator_init_data *regulator_init_data; +}; + +#endif -- cgit v1.2.3 From f705806c9f355fc63911dea72a65d8eeff0c2586 Mon Sep 17 00:00:00 2001 From: Gyungoh Yoo Date: Fri, 27 Feb 2015 15:42:22 +0900 Subject: backlight: Add support Skyworks SKY81452 backlight driver Signed-off-by: Gyungoh Yoo Acked-by: Jingoo Han Acked-by: Bryan Wu Signed-off-by: Lee Jones --- include/linux/platform_data/sky81452-backlight.h | 46 ++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 include/linux/platform_data/sky81452-backlight.h (limited to 'include/linux') diff --git a/include/linux/platform_data/sky81452-backlight.h b/include/linux/platform_data/sky81452-backlight.h new file mode 100644 index 000000000000..1231e9bb00f1 --- /dev/null +++ b/include/linux/platform_data/sky81452-backlight.h @@ -0,0 +1,46 @@ +/* + * sky81452.h SKY81452 backlight driver + * + * Copyright 2014 Skyworks Solutions Inc. + * Author : Gyungoh Yoo + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef _SKY81452_BACKLIGHT_H +#define _SKY81452_BACKLIGHT_H + +/** + * struct sky81452_platform_data + * @name: backlight driver name. + If it is not defined, default name is lcd-backlight. + * @gpio_enable:GPIO number which control EN pin + * @enable: Enable mask for current sink channel 1, 2, 3, 4, 5 and 6. + * @ignore_pwm: true if DPWMI should be ignored. + * @dpwm_mode: true is DPWM dimming mode, otherwise Analog dimming mode. + * @phase_shift:true is phase shift mode. + * @short_detecion_threshold: It should be one of 4, 5, 6 and 7V. + * @boost_current_limit: It should be one of 2300, 2750mA. + */ +struct sky81452_bl_platform_data { + const char *name; + int gpio_enable; + unsigned int enable; + bool ignore_pwm; + bool dpwm_mode; + bool phase_shift; + unsigned int short_detection_threshold; + unsigned int boost_current_limit; +}; + +#endif -- cgit v1.2.3 From fb82fe2fe8588745edd73aa3a6229facac5c1e15 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 11 Mar 2015 21:16:31 -0700 Subject: clocksource: Add 'max_cycles' to 'struct clocksource' In order to facilitate clocksource validation, add a 'max_cycles' field to the clocksource structure which will hold the maximum cycle value that can safely be multiplied without potentially causing an overflow. Signed-off-by: John Stultz Cc: Dave Jones Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1426133800-29329-4-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- include/linux/clocksource.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 9c78d15d33e4..16d048cadebb 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -56,6 +56,7 @@ struct module; * @shift: cycle to nanosecond divisor (power of two) * @max_idle_ns: max idle time permitted by the clocksource (nsecs) * @maxadj: maximum adjustment value to mult (~11%) + * @max_cycles: maximum safe cycle value which won't overflow on multiplication * @flags: flags describing special properties * @archdata: arch-specific data * @suspend: suspend function for the clocksource, if necessary @@ -76,7 +77,7 @@ struct clocksource { #ifdef CONFIG_ARCH_CLOCKSOURCE_DATA struct arch_clocksource_data archdata; #endif - + u64 max_cycles; const char *name; struct list_head list; int rating; @@ -189,7 +190,7 @@ extern struct clocksource * __init clocksource_default_clock(void); extern void clocksource_mark_unstable(struct clocksource *cs); extern u64 -clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask); +clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cycles); extern void clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec); -- cgit v1.2.3 From 01c94e64f5a6f298774bdbde435e577821119fc0 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Wed, 4 Mar 2015 11:14:33 +0100 Subject: KVM: introduce kvm_arch_intc_initialized and use it in irqfd Introduce __KVM_HAVE_ARCH_INTC_INITIALIZED define and associated kvm_arch_intc_initialized function. This latter allows to test whether the virtual interrupt controller is initialized and ready to accept virtual IRQ injection. On some architectures, the virtual interrupt controller is dynamically instantiated, justifying that kind of check. The new function can now be used by irqfd to check whether the virtual interrupt controller is ready on KVM_IRQFD request. If not, KVM_IRQFD returns -EAGAIN. Signed-off-by: Eric Auger Acked-by: Christoffer Dall Reviewed-by: Andre Przywara Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d12b2104d19b..ae9c72012004 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -700,6 +700,20 @@ static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) #endif } +#ifdef __KVM_HAVE_ARCH_INTC_INITIALIZED +/* + * returns true if the virtual interrupt controller is initialized and + * ready to accept virtual IRQ. On some architectures the virtual interrupt + * controller is dynamically instantiated and this is not always true. + */ +bool kvm_arch_intc_initialized(struct kvm *kvm); +#else +static inline bool kvm_arch_intc_initialized(struct kvm *kvm) +{ + return true; +} +#endif + int kvm_arch_init_vm(struct kvm *kvm, unsigned long type); void kvm_arch_destroy_vm(struct kvm *kvm); void kvm_arch_sync_events(struct kvm *kvm); -- cgit v1.2.3 From efd7ef1c1929d7a0329d4349252863c04d6f1729 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 11 Mar 2015 23:04:08 -0500 Subject: net: Kill hold_net release_net hold_net and release_net were an idea that turned out to be useless. The code has been disabled since 2008. Kill the code it is long past due. Signed-off-by: "Eric W. Biederman" Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1354ae83efc8..cede40d9cac9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1864,8 +1864,7 @@ static inline void dev_net_set(struct net_device *dev, struct net *net) { #ifdef CONFIG_NET_NS - release_net(dev->nd_net); - dev->nd_net = hold_net(net); + dev->nd_net = net; #endif } -- cgit v1.2.3 From 0c5c9fb55106333e773de8c9dd321fa8240caeb3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 11 Mar 2015 23:06:44 -0500 Subject: net: Introduce possible_net_t Having to say > #ifdef CONFIG_NET_NS > struct net *net; > #endif in structures is a little bit wordy and a little bit error prone. Instead it is possible to say: > typedef struct { > #ifdef CONFIG_NET_NS > struct net *net; > #endif > } possible_net_t; And then in a header say: > possible_net_t net; Which is cleaner and easier to use and easier to test, as the possible_net_t is always there no matter what the compile options. Further this allows read_pnet and write_pnet to be functions in all cases which is better at catching typos. This change adds possible_net_t, updates the definitions of read_pnet and write_pnet, updates optional struct net * variables that write_pnet uses on to have the type possible_net_t, and finally fixes up the b0rked users of read_pnet and write_pnet. Signed-off-by: "Eric W. Biederman" Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cede40d9cac9..ddab1a2a07a0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1721,9 +1721,7 @@ struct net_device { struct netpoll_info __rcu *npinfo; #endif -#ifdef CONFIG_NET_NS - struct net *nd_net; -#endif + possible_net_t nd_net; /* mid-layer private */ union { @@ -1863,9 +1861,7 @@ struct net *dev_net(const struct net_device *dev) static inline void dev_net_set(struct net_device *dev, struct net *net) { -#ifdef CONFIG_NET_NS - dev->nd_net = net; -#endif + write_pnet(&dev->nd_net, net); } static inline bool netdev_uses_dsa(struct net_device *dev) -- cgit v1.2.3 From 80f1d68ccba70b1060c9c7360ca83da430f66bed Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 12 Mar 2015 17:21:42 +0100 Subject: ebpf: verifier: check that call reg with ARG_ANYTHING is initialized I noticed that a helper function with argument type ARG_ANYTHING does not need to have an initialized value (register). This can worst case lead to unintented stack memory leakage in future helper functions if they are not carefully designed, or unintended application behaviour in case the application developer was not careful enough to match a correct helper function signature in the API. The underlying issue is that ARG_ANYTHING should actually be split into two different semantics: 1) ARG_DONTCARE for function arguments that the helper function does not care about (in other words: the default for unused function arguments), and 2) ARG_ANYTHING that is an argument actually being used by a helper function and *guaranteed* to be an initialized register. The current risk is low: ARG_ANYTHING is only used for the 'flags' argument (r4) in bpf_map_update_elem() that internally does strict checking. Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a884f5a2c503..80f2e0fc3d02 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -44,7 +44,7 @@ struct bpf_map_type_list { /* function argument constraints */ enum bpf_arg_type { - ARG_ANYTHING = 0, /* any argument is ok */ + ARG_DONTCARE = 0, /* unused argument in helper function */ /* the following constraints used to prototype * bpf_map_lookup/update/delete_elem() functions @@ -58,6 +58,8 @@ enum bpf_arg_type { */ ARG_PTR_TO_STACK, /* any pointer to eBPF program stack */ ARG_CONST_STACK_SIZE, /* number of bytes accessed from stack */ + + ARG_ANYTHING, /* any (initialized) argument is ok */ }; /* type of values returned from helper functions */ -- cgit v1.2.3 From 88428cc5c27c63a4313e213813bc39b9899224d5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 28 Jan 2015 14:42:09 -0800 Subject: rcu: Handle outgoing CPUs on exit from idle loop This commit informs RCU of an outgoing CPU just before that CPU invokes arch_cpu_idle_dead() during its last pass through the idle loop (via a new CPU_DYING_IDLE notifier value). This change means that RCU need not deal with outgoing CPUs passing through the scheduler after informing RCU that they are no longer online. Note that removing the CPU from the rcu_node ->qsmaskinit bit masks is done at CPU_DYING_IDLE time, and orphaning callbacks is still done at CPU_DEAD time, the reason being that at CPU_DEAD time we have another CPU that can adopt them. Signed-off-by: Paul E. McKenney --- include/linux/cpu.h | 2 ++ include/linux/rcupdate.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 4744ef915acd..d028721748d4 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -95,6 +95,8 @@ enum { * Called on the new cpu, just before * enabling interrupts. Must not sleep, * must not fail */ +#define CPU_DYING_IDLE 0x000B /* CPU (unsigned)v dying, reached + * idle loop. */ #define CPU_BROKEN 0x000C /* CPU (unsigned)v did not die properly, * perhaps due to preemption. */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 78097491cd99..762022f07afd 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -266,6 +266,8 @@ void rcu_idle_enter(void); void rcu_idle_exit(void); void rcu_irq_enter(void); void rcu_irq_exit(void); +int rcu_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu); #ifdef CONFIG_RCU_STALL_COMMON void rcu_sysrq_start(void); -- cgit v1.2.3 From a5b6846f9e1a080493210013385c28faecee36f0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 12 Mar 2015 15:28:40 +0100 Subject: rhashtable: kill ht->shift atomic operations Commit c0c09bfdc415 ("rhashtable: avoid unnecessary wakeup for worker queue") changed ht->shift to be atomic, which is actually unnecessary. Instead of leaving the current shift in the core rhashtable structure, it can be cached inside the individual bucket tables. There, it will only be initialized once during a new table allocation in the shrink/expansion slow path, and from then onward it stays immutable for the rest of the bucket table liftime. That allows shift to be non-atomic. The patch also moves hash_rnd management into the table setup. The rhashtable structure now consumes 3 instead of 4 cachelines. Signed-off-by: Daniel Borkmann Cc: Ying Xue Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 5ef8ea551556..c93ff8ac474a 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -50,6 +50,7 @@ struct rhash_head { * struct bucket_table - Table of hash buckets * @size: Number of hash buckets * @hash_rnd: Random seed to fold into hash + * @shift: Current size (1 << shift) * @locks_mask: Mask to apply before accessing locks[] * @locks: Array of spinlocks protecting individual buckets * @buckets: size * hash buckets @@ -57,6 +58,7 @@ struct rhash_head { struct bucket_table { size_t size; u32 hash_rnd; + u32 shift; unsigned int locks_mask; spinlock_t *locks; @@ -99,7 +101,6 @@ struct rhashtable_params { * @tbl: Bucket table * @future_tbl: Table under construction during expansion/shrinking * @nelems: Number of elements in table - * @shift: Current size (1 << shift) * @p: Configuration parameters * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping @@ -110,12 +111,11 @@ struct rhashtable { struct bucket_table __rcu *tbl; struct bucket_table __rcu *future_tbl; atomic_t nelems; - atomic_t shift; + bool being_destroyed; struct rhashtable_params p; struct work_struct run_work; struct mutex mutex; struct list_head walkers; - bool being_destroyed; }; /** -- cgit v1.2.3 From 66ee59af630fd8d5f4f56fb28162857e629aa0ab Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Feb 2015 19:56:46 +0100 Subject: fs: remove ki_nbytes There is no need to pass the total request length in the kiocb, as we already get passed in through the iov_iter argument. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/aio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/aio.h b/include/linux/aio.h index d9c92daa3944..132d1ecba435 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -42,7 +42,6 @@ struct kiocb { __u64 ki_user_data; /* user's data for completion */ loff_t ki_pos; - size_t ki_nbytes; /* copy of iocb->aio_nbytes */ struct list_head ki_list; /* the aio core uses this * for cancellation */ -- cgit v1.2.3 From f8935983f110505daa38e8d36ee406807f83a069 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 11 Mar 2015 21:16:37 -0700 Subject: clocksource: Mostly kill clocksource_register() A long running project has been to clean up remaining uses of clocksource_register(), replacing it with the simpler clocksource_register_khz/hz() functions. However, there are a few cases where we need to self-define our mult/shift values, so switch the function to a more obviously internal __clocksource_register() name, and consolidate much of the internal logic so we don't have duplication. Signed-off-by: John Stultz Cc: Dave Jones Cc: David S. Miller Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1426133800-29329-10-git-send-email-john.stultz@linaro.org [ Minor cleanups. ] Signed-off-by: Ingo Molnar --- include/linux/clocksource.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 16d048cadebb..bd98eaa4d005 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -179,7 +179,6 @@ static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift) } -extern int clocksource_register(struct clocksource*); extern int clocksource_unregister(struct clocksource*); extern void clocksource_touch_watchdog(void); extern struct clocksource* clocksource_get_next(void); @@ -203,6 +202,15 @@ __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq); extern void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq); +/* + * Don't call this unless you are a default clocksource + * (AKA: jiffies) and absolutely have to. + */ +static inline int __clocksource_register(struct clocksource *cs) +{ + return __clocksource_register_scale(cs, 1, 0); +} + static inline int clocksource_register_hz(struct clocksource *cs, u32 hz) { return __clocksource_register_scale(cs, 1, hz); -- cgit v1.2.3 From fba9e07208c0f9d92d9f73761c99c8612039da44 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 11 Mar 2015 21:16:40 -0700 Subject: clocksource: Rename __clocksource_updatefreq_*() to __clocksource_update_freq_*() Ingo requested this function be renamed to improve readability, so I've renamed __clocksource_updatefreq_scale() as well as the __clocksource_updatefreq_hz/khz() functions to avoid squishedtogethernames. This touches some of the sh clocksources, which I've not tested. The arch/arm/plat-omap change is just a comment change for consistency. Signed-off-by: John Stultz Cc: Daniel Lezcano Cc: Dave Jones Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1426133800-29329-13-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- include/linux/clocksource.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index bd98eaa4d005..135509821c39 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -200,7 +200,7 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec); extern int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq); extern void -__clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq); +__clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq); /* * Don't call this unless you are a default clocksource @@ -221,14 +221,14 @@ static inline int clocksource_register_khz(struct clocksource *cs, u32 khz) return __clocksource_register_scale(cs, 1000, khz); } -static inline void __clocksource_updatefreq_hz(struct clocksource *cs, u32 hz) +static inline void __clocksource_update_freq_hz(struct clocksource *cs, u32 hz) { - __clocksource_updatefreq_scale(cs, 1, hz); + __clocksource_update_freq_scale(cs, 1, hz); } -static inline void __clocksource_updatefreq_khz(struct clocksource *cs, u32 khz) +static inline void __clocksource_update_freq_khz(struct clocksource *cs, u32 khz) { - __clocksource_updatefreq_scale(cs, 1000, khz); + __clocksource_update_freq_scale(cs, 1000, khz); } -- cgit v1.2.3 From 9c51026509d7fd11d84e0035008e1a9768960f2b Mon Sep 17 00:00:00 2001 From: Syed Asifful Dayyan Date: Fri, 6 Mar 2015 18:40:42 +0100 Subject: brcmfmac: Add support for BCM4345 SDIO chipset. These changes add support for BCM4345 SDIO chipset. Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Arend Van Spriel Reviewed-by: Hante Meuleman Reviewed-by: Daniel (Deognyoun) Kim Signed-off-by: Syed Asifful Dayyan Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- include/linux/mmc/sdio_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 996807963716..91397858dc95 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -33,6 +33,7 @@ #define SDIO_DEVICE_ID_BROADCOM_43341 0xa94d #define SDIO_DEVICE_ID_BROADCOM_4335_4339 0x4335 #define SDIO_DEVICE_ID_BROADCOM_43362 0xa962 +#define SDIO_DEVICE_ID_BROADCOM_4345 0x4345 #define SDIO_DEVICE_ID_BROADCOM_4354 0x4354 #define SDIO_VENDOR_ID_INTEL 0x0089 -- cgit v1.2.3 From 2187f03a9576c4fb8342deed912b5ba6fcf98cba Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 5 Jan 2015 15:35:39 +0100 Subject: i2c: add quirk structure to describe adapter flaws The number of I2C adapters which are not fully I2C compatible is rising, sadly. Drivers usually do handle the flaws, still the user receives only some errno for a transfer which normally can be expected to work. This patch introduces a formal description of flaws. One advantage is that the core can check before the actual transfer if the messages could be transferred at all. This is done in the next patch. Another advantage is that we can pass this information to the user so the restrictions are exactly known and further actions can be based on that. This will be done later after some stabilization period for this description. Signed-off-by: Wolfram Sang Tested-by: Ray Jui Tested-by: Ivan T. Ivanov Tested-by: Neelesh Gupta Tested-By: Ludovic Desroches --- include/linux/i2c.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index f17da50402a4..243d1a1d78b2 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -449,6 +449,48 @@ int i2c_recover_bus(struct i2c_adapter *adap); int i2c_generic_gpio_recovery(struct i2c_adapter *adap); int i2c_generic_scl_recovery(struct i2c_adapter *adap); +/** + * struct i2c_adapter_quirks - describe flaws of an i2c adapter + * @flags: see I2C_AQ_* for possible flags and read below + * @max_num_msgs: maximum number of messages per transfer + * @max_write_len: maximum length of a write message + * @max_read_len: maximum length of a read message + * @max_comb_1st_msg_len: maximum length of the first msg in a combined message + * @max_comb_2nd_msg_len: maximum length of the second msg in a combined message + * + * Note about combined messages: Some I2C controllers can only send one message + * per transfer, plus something called combined message or write-then-read. + * This is (usually) a small write message followed by a read message and + * barely enough to access register based devices like EEPROMs. There is a flag + * to support this mode. It implies max_num_msg = 2 and does the length checks + * with max_comb_*_len because combined message mode usually has its own + * limitations. Because of HW implementations, some controllers can actually do + * write-then-anything or other variants. To support that, write-then-read has + * been broken out into smaller bits like write-first and read-second which can + * be combined as needed. + */ + +struct i2c_adapter_quirks { + u64 flags; + int max_num_msgs; + u16 max_write_len; + u16 max_read_len; + u16 max_comb_1st_msg_len; + u16 max_comb_2nd_msg_len; +}; + +/* enforce max_num_msgs = 2 and use max_comb_*_len for length checks */ +#define I2C_AQ_COMB BIT(0) +/* first combined message must be write */ +#define I2C_AQ_COMB_WRITE_FIRST BIT(1) +/* second combined message must be read */ +#define I2C_AQ_COMB_READ_SECOND BIT(2) +/* both combined messages must have the same target address */ +#define I2C_AQ_COMB_SAME_ADDR BIT(3) +/* convenience macro for typical write-then read case */ +#define I2C_AQ_COMB_WRITE_THEN_READ (I2C_AQ_COMB | I2C_AQ_COMB_WRITE_FIRST | \ + I2C_AQ_COMB_READ_SECOND | I2C_AQ_COMB_SAME_ADDR) + /* * i2c_adapter is the structure used to identify a physical i2c bus along * with the access algorithms necessary to access it. @@ -474,6 +516,7 @@ struct i2c_adapter { struct list_head userspace_clients; struct i2c_bus_recovery_info *bus_recovery_info; + const struct i2c_adapter_quirks *quirks; }; #define to_i2c_adapter(d) container_of(d, struct i2c_adapter, dev) -- cgit v1.2.3 From 702131e2a393b45174be326f1dbe20b658b4f157 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Thu, 5 Mar 2015 18:25:10 +0100 Subject: bcma: move PCI IRQ control function to host specific code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function isn't really related to any bus core. It touches PCI device config registers only, so move it to the (PCI) host file. Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- include/linux/bcma/bcma.h | 9 +++++++++ include/linux/bcma/bcma_driver_pci.h | 2 -- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 44057b45ed32..e34f906647d3 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -437,6 +437,8 @@ static inline struct bcma_device *bcma_find_core(struct bcma_bus *bus, #ifdef CONFIG_BCMA_HOST_PCI extern void bcma_host_pci_up(struct bcma_bus *bus); extern void bcma_host_pci_down(struct bcma_bus *bus); +extern int bcma_host_pci_irq_ctl(struct bcma_bus *bus, + struct bcma_device *core, bool enable); #else static inline void bcma_host_pci_up(struct bcma_bus *bus) { @@ -444,6 +446,13 @@ static inline void bcma_host_pci_up(struct bcma_bus *bus) static inline void bcma_host_pci_down(struct bcma_bus *bus) { } +static inline int bcma_host_pci_irq_ctl(struct bcma_bus *bus, + struct bcma_device *core, bool enable) +{ + if (bus->hosttype == BCMA_HOSTTYPE_PCI) + return -ENOTSUPP; + return 0; +} #endif extern bool bcma_core_is_enabled(struct bcma_device *core); diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index 8e90004fdfd7..3a468687c170 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -238,8 +238,6 @@ struct bcma_drv_pci { #define pcicore_write16(pc, offset, val) bcma_write16((pc)->core, offset, val) #define pcicore_write32(pc, offset, val) bcma_write32((pc)->core, offset, val) -extern int bcma_core_pci_irq_ctl(struct bcma_bus *bus, - struct bcma_device *core, bool enable); extern void bcma_core_pci_power_save(struct bcma_bus *bus, bool up); extern int bcma_core_pci_pcibios_map_irq(const struct pci_dev *dev); -- cgit v1.2.3 From 982a40f5c0bb03368989a6b1ae833b474854e931 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Thu, 5 Mar 2015 18:25:11 +0100 Subject: bcma: allow disabling (not building) PCI driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It isn't required for bcma bus on SoCs, so provide some empty functions and allow disabling it. Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- include/linux/bcma/bcma_driver_pci.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index 3a468687c170..5ba6918ca20b 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -238,7 +238,13 @@ struct bcma_drv_pci { #define pcicore_write16(pc, offset, val) bcma_write16((pc)->core, offset, val) #define pcicore_write32(pc, offset, val) bcma_write32((pc)->core, offset, val) +#ifdef CONFIG_BCMA_DRIVER_PCI extern void bcma_core_pci_power_save(struct bcma_bus *bus, bool up); +#else +static inline void bcma_core_pci_power_save(struct bcma_bus *bus, bool up) +{ +} +#endif extern int bcma_core_pci_pcibios_map_irq(const struct pci_dev *dev); extern int bcma_core_pci_plat_dev_init(struct pci_dev *dev); -- cgit v1.2.3 From b62c21b71f08b7a4bfd025616ff1da2913a82904 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 12 Mar 2015 23:56:02 -0400 Subject: blk-mq: add blk_mq_init_allocated_queue and export blk_mq_register_disk Add a variant of blk_mq_init_queue that allows a previously allocated queue to be initialized. blk_mq_init_allocated_queue models blk_init_allocated_queue -- which was also created for DM's use. DM's approach to device creation requires a placeholder request_queue be allocated for use with alloc_dev() but the decision about what type of request_queue will be ultimately created is deferred until all component devices referenced in the DM table are processed to determine the table type (request-based, blk-mq request-based, or bio-based). Also, because of DM's late finalization of the request_queue type the call to blk_mq_register_disk() doesn't happen during alloc_dev(). Must export blk_mq_register_disk() so that DM can backfill the 'mq' dir once the blk-mq queue is fully allocated. Signed-off-by: Mike Snitzer Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 7aec86127335..9a75c88e8908 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -164,6 +164,8 @@ enum { << BLK_MQ_F_ALLOC_POLICY_START_BIT) struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); +struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, + struct request_queue *q); void blk_mq_finish_init(struct request_queue *q); int blk_mq_register_disk(struct gendisk *); void blk_mq_unregister_disk(struct gendisk *); -- cgit v1.2.3 From b94ec296403e99d5ac9a8c48332cec4118d44b94 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 11 Mar 2015 23:56:38 -0400 Subject: blk-mq: export blk_mq_run_hw_queues Rename blk_mq_run_queues to blk_mq_run_hw_queues, add async argument, and export it. DM's suspend support must be able to run the queue without starting stopped hw queues. Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 9a75c88e8908..ebfe707cf722 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -220,6 +220,7 @@ void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_stop_hw_queues(struct request_queue *q); void blk_mq_start_hw_queues(struct request_queue *q); void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); +void blk_mq_run_hw_queues(struct request_queue *q, bool async); void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn, void *priv); -- cgit v1.2.3 From ab330cf3888d8e0779fa05a243d53ba9f53a7ba9 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Thu, 12 Mar 2015 15:35:20 +0900 Subject: usb: renesas_usbhs: add support for USB-DMAC Some Renesas SoCs have the USB-DMAC. It is able to terminate transfers when a short packet is received, even if less bytes than the transfer counter size have been received. Also, it is able to send a short packet even if the packet size is not multiples of 8bytes. Since the previous code has used the interruption of USBHS controller when receiving packets even if this driver has used a dmac, a lot of interruptions has happened. This patch will reduce such interruptions. This patch allows to use the USB-DMAC on R-Car H2 and M2. Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- include/linux/usb/renesas_usbhs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h index 9fd9e481ea98..f06529c14141 100644 --- a/include/linux/usb/renesas_usbhs.h +++ b/include/linux/usb/renesas_usbhs.h @@ -165,6 +165,8 @@ struct renesas_usbhs_driver_param { */ u32 has_otg:1; /* for controlling PWEN/EXTLP */ u32 has_sudmac:1; /* for SUDMAC */ + u32 has_usb_dmac:1; /* for USB-DMAC */ +#define USBHS_USB_DMAC_XFER_SIZE 32 /* hardcode the xfer size */ }; #define USBHS_TYPE_R8A7790 1 -- cgit v1.2.3 From 599bd19bdc4c6b20fd91d50f2f79dececbaf80c1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Feb 2015 19:59:44 +0100 Subject: fs: don't allow to complete sync iocbs through aio_complete The AIO interface is fairly complex because it tries to allow filesystems to always work async and then wakeup a synchronous caller through aio_complete. It turns out that basically no one was doing this to avoid the complexity and context switches, and we've already fixed up the remaining users and can now get rid of this case. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/aio.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/aio.h b/include/linux/aio.h index 132d1ecba435..f8516430490d 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -37,7 +37,6 @@ struct kiocb { union { void __user *user; - struct task_struct *tsk; } ki_obj; __u64 ki_user_data; /* user's data for completion */ @@ -63,13 +62,11 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) *kiocb = (struct kiocb) { .ki_ctx = NULL, .ki_filp = filp, - .ki_obj.tsk = current, }; } /* prototypes */ #ifdef CONFIG_AIO -extern ssize_t wait_on_sync_kiocb(struct kiocb *iocb); extern void aio_complete(struct kiocb *iocb, long res, long res2); struct mm_struct; extern void exit_aio(struct mm_struct *mm); @@ -77,7 +74,6 @@ extern long do_io_submit(aio_context_t ctx_id, long nr, struct iocb __user *__user *iocbpp, bool compat); void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel); #else -static inline ssize_t wait_on_sync_kiocb(struct kiocb *iocb) { return 0; } static inline void aio_complete(struct kiocb *iocb, long res, long res2) { } struct mm_struct; static inline void exit_aio(struct mm_struct *mm) { } -- cgit v1.2.3 From 04b2fa9f8f36ec6fb6fd1c9dc9df6fff0cd27323 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 2 Feb 2015 14:49:06 +0100 Subject: fs: split generic and aio kiocb Most callers in the kernel want to perform synchronous file I/O, but still have to bloat the stack with a full struct kiocb. Split out the parts needed in filesystem code from those in the aio code, and only allocate those needed to pass down argument on the stack. The aio code embedds the generic iocb in the one it allocates and can easily get back to it by using container_of. Also add a ->ki_complete method to struct kiocb, this is used to call into the aio code and thus removes the dependency on aio for filesystems impementing asynchronous operations. It will also allow other callers to substitute their own completion callback. We also add a new ->ki_flags field to work around the nasty layering violation recently introduced in commit 5e33f6 ("usb: gadget: ffs: add eventfd notification about ffs events"). Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/aio.h | 46 ++++++---------------------------------------- 1 file changed, 6 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/include/linux/aio.h b/include/linux/aio.h index f8516430490d..5c40b61285ac 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -14,67 +14,38 @@ struct kiocb; #define KIOCB_KEY 0 -/* - * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either - * cancelled or completed (this makes a certain amount of sense because - * successful cancellation - io_cancel() - does deliver the completion to - * userspace). - * - * And since most things don't implement kiocb cancellation and we'd really like - * kiocb completion to be lockless when possible, we use ki_cancel to - * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED - * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel(). - */ -#define KIOCB_CANCELLED ((void *) (~0ULL)) - typedef int (kiocb_cancel_fn)(struct kiocb *); +#define IOCB_EVENTFD (1 << 0) + struct kiocb { struct file *ki_filp; - struct kioctx *ki_ctx; /* NULL for sync ops */ - kiocb_cancel_fn *ki_cancel; - void *private; - - union { - void __user *user; - } ki_obj; - - __u64 ki_user_data; /* user's data for completion */ loff_t ki_pos; - - struct list_head ki_list; /* the aio core uses this - * for cancellation */ - - /* - * If the aio_resfd field of the userspace iocb is not zero, - * this is the underlying eventfd context to deliver events to. - */ - struct eventfd_ctx *ki_eventfd; + void (*ki_complete)(struct kiocb *iocb, long ret, long ret2); + void *private; + int ki_flags; }; static inline bool is_sync_kiocb(struct kiocb *kiocb) { - return kiocb->ki_ctx == NULL; + return kiocb->ki_complete == NULL; } static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) { *kiocb = (struct kiocb) { - .ki_ctx = NULL, .ki_filp = filp, }; } /* prototypes */ #ifdef CONFIG_AIO -extern void aio_complete(struct kiocb *iocb, long res, long res2); struct mm_struct; extern void exit_aio(struct mm_struct *mm); extern long do_io_submit(aio_context_t ctx_id, long nr, struct iocb __user *__user *iocbpp, bool compat); void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel); #else -static inline void aio_complete(struct kiocb *iocb, long res, long res2) { } struct mm_struct; static inline void exit_aio(struct mm_struct *mm) { } static inline long do_io_submit(aio_context_t ctx_id, long nr, @@ -84,11 +55,6 @@ static inline void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) { } #endif /* CONFIG_AIO */ -static inline struct kiocb *list_kiocb(struct list_head *h) -{ - return list_entry(h, struct kiocb, ki_list); -} - /* for sysctl: */ extern unsigned long aio_nr; extern unsigned long aio_max_nr; -- cgit v1.2.3 From 46f5cace1cd0559c335dfd4a5b8f57456d1bd6a1 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 13 Mar 2015 11:09:42 -0700 Subject: usb: phy: msm: Remove dead code This code is no longer used now that mach-msm has been removed. Delete it. Cc: Felipe Balbi Cc: Greg Kroah-Hartman Cc: linux-usb@vger.kernel.org Cc: David Brown Cc: Bryan Huntsman Cc: Daniel Walker Signed-off-by: Stephen Boyd Signed-off-by: Felipe Balbi --- include/linux/usb/msm_hsusb.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/msm_hsusb.h b/include/linux/usb/msm_hsusb.h index b0a39243295a..7dbecf9a4656 100644 --- a/include/linux/usb/msm_hsusb.h +++ b/include/linux/usb/msm_hsusb.h @@ -117,8 +117,6 @@ struct msm_otg_platform_data { enum otg_control_type otg_control; enum msm_usb_phy_type phy_type; void (*setup_gpio)(enum usb_otg_state state); - int (*link_clk_reset)(struct clk *link_clk, bool assert); - int (*phy_clk_reset)(struct clk *phy_clk); }; /** @@ -128,7 +126,6 @@ struct msm_otg_platform_data { * @irq: IRQ number assigned for HSUSB controller. * @clk: clock struct of usb_hs_clk. * @pclk: clock struct of usb_hs_pclk. - * @phy_reset_clk: clock struct of usb_phy_clk. * @core_clk: clock struct of usb_hs_core_clk. * @regs: ioremapped register base address. * @inputs: OTG state machine inputs(Id, SessValid etc). @@ -148,7 +145,6 @@ struct msm_otg { int irq; struct clk *clk; struct clk *pclk; - struct clk *phy_reset_clk; struct clk *core_clk; void __iomem *regs; #define ID 0 -- cgit v1.2.3 From e44ea364394499d38a26ed4c9668fb378ae8797f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 12 Mar 2015 08:44:01 +0100 Subject: power_supply: Add driver private data Allow drivers to store private data inside power_supply structure for later usage in power supply operations. Usage of driver private data is necessary to access driver's state container object from power supply calls (like get_property()) if struct 'power_supply' is a stored there as a pointer, for example: struct some_driver_info { struct i2c_client *client; struct power_supply *power_supply; ... } In such case one cannot use container_of() and must store pointer to state container as private data. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Bartlomiej Zolnierkiewicz Reviewed-by: Sebastian Reichel Acked-by: Pavel Machek Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index f606d6b4bd56..e30d85c0158d 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -209,6 +209,9 @@ struct power_supply { /* For APM emulation, think legacy userspace. */ int use_for_apm; + /* Driver private data */ + void *drv_data; + /* private */ struct device *dev; struct work_struct changed_work; @@ -285,6 +288,7 @@ extern int devm_power_supply_register_no_ws(struct device *parent, extern void power_supply_unregister(struct power_supply *psy); extern int power_supply_powers(struct power_supply *psy, struct device *dev); +extern void *power_supply_get_drvdata(struct power_supply *psy); /* For APM emulation, think legacy userspace. */ extern struct class *power_supply_class; -- cgit v1.2.3 From 2dc9215d7c94f7f9f34ccf8b1710ad73d82f6216 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 12 Mar 2015 08:44:02 +0100 Subject: power_supply: Move run-time configuration to separate structure Add new structure 'power_supply_config' for holding run-time initialization data like of_node, supplies and private driver data. The power_supply_register() function is changed so all power supply drivers need updating. When registering the power supply this new 'power_supply_config' should be used instead of directly initializing 'struct power_supply'. This allows changing the ownership of power_supply structure from driver to the power supply core in next patches. When a driver does not use of_node or supplies then it should use NULL as config. If driver uses of_node or supplies then it should allocate config on stack and initialize it with proper values. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Bartlomiej Zolnierkiewicz Acked-by: Pavel Machek [for the nvec part] Reviewed-by: Marc Dietrich [for drivers/platform/x86/compal-laptop.c] Reviewed-by: Darren Hart [for drivers/hid/*] Reviewed-by: Jiri Kosina Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index e30d85c0158d..0d7c95f634a5 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -176,6 +176,16 @@ union power_supply_propval { struct device; struct device_node; +/* Power supply instance specific configuration */ +struct power_supply_config { + struct device_node *of_node; + /* Driver private data */ + void *drv_data; + + char **supplied_to; + size_t num_supplicants; +}; + struct power_supply { const char *name; enum power_supply_type type; @@ -278,13 +288,17 @@ static inline int power_supply_is_system_supplied(void) { return -ENOSYS; } #endif extern int power_supply_register(struct device *parent, - struct power_supply *psy); + struct power_supply *psy, + const struct power_supply_config *cfg); extern int power_supply_register_no_ws(struct device *parent, - struct power_supply *psy); + struct power_supply *psy, + const struct power_supply_config *cfg); extern int devm_power_supply_register(struct device *parent, - struct power_supply *psy); + struct power_supply *psy, + const struct power_supply_config *cfg); extern int devm_power_supply_register_no_ws(struct device *parent, - struct power_supply *psy); + struct power_supply *psy, + const struct power_supply_config *cfg); extern void power_supply_unregister(struct power_supply *psy); extern int power_supply_powers(struct power_supply *psy, struct device *dev); -- cgit v1.2.3 From bc1540561c9ede1efb6d7bf44804676d3d02a3cc Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 12 Mar 2015 08:44:03 +0100 Subject: power_supply: Add API for safe access of power supply function attrs Add simple wrappers for accessing power supply's function attributes: - get_property -> power_supply_get_property - set_property -> power_supply_set_property - property_is_writeable -> power_supply_property_is_writeable - external_power_changed -> power_supply_external_power_changed This API along with atomic usage counter adds a safe way of accessing a power supply from another driver. If power supply is unregistered after obtaining reference to it by some driver, then the API wrappers won't be executed in invalid (freed) context. Next patch changing the ownership of power supply class is still needed to fully fix race conditions in accessing freed power supply. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Bartlomiej Zolnierkiewicz Reviewed-by: Sebastian Reichel Acked-by: Pavel Machek Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 0d7c95f634a5..7ae60346465f 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -199,6 +199,12 @@ struct power_supply { size_t num_supplies; struct device_node *of_node; + /* + * Functions for drivers implementing power supply class. + * These shouldn't be called directly by other drivers for accessing + * this power supply. Instead use power_supply_*() functions (for + * example power_supply_get_property()). + */ int (*get_property)(struct power_supply *psy, enum power_supply_property psp, union power_supply_propval *val); @@ -227,6 +233,7 @@ struct power_supply { struct work_struct changed_work; spinlock_t changed_lock; bool changed; + atomic_t use_cnt; #ifdef CONFIG_THERMAL struct thermal_zone_device *tzd; struct thermal_cooling_device *tcd; @@ -287,6 +294,15 @@ extern int power_supply_is_system_supplied(void); static inline int power_supply_is_system_supplied(void) { return -ENOSYS; } #endif +extern int power_supply_get_property(struct power_supply *psy, + enum power_supply_property psp, + union power_supply_propval *val); +extern int power_supply_set_property(struct power_supply *psy, + enum power_supply_property psp, + const union power_supply_propval *val); +extern int power_supply_property_is_writeable(struct power_supply *psy, + enum power_supply_property psp); +extern void power_supply_external_power_changed(struct power_supply *psy); extern int power_supply_register(struct device *parent, struct power_supply *psy, const struct power_supply_config *cfg); -- cgit v1.2.3 From 297d716f6260cc9421d971b124ca196b957ee458 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 12 Mar 2015 08:44:11 +0100 Subject: power_supply: Change ownership from driver to core Change the ownership of power_supply structure from each driver implementing the class to the power supply core. The patch changes power_supply_register() function thus all drivers implementing power supply class are adjusted. Each driver provides the implementation of power supply. However it should not be the owner of power supply class instance because it is exposed by core to other subsystems with power_supply_get_by_name(). These other subsystems have no knowledge when the driver will unregister the power supply. This leads to several issues when driver is unbound - mostly because user of power supply accesses freed memory. Instead let the core own the instance of struct 'power_supply'. Other users of this power supply will still access valid memory because it will be freed when device reference count reaches 0. Currently this means "it will leak" but power_supply_put() call in next patches will solve it. This solves invalid memory references in following race condition scenario: Thread 1: charger manager Thread 2: power supply driver, used by charger manager THREAD 1 (charger manager) THREAD 2 (power supply driver) ========================== ============================== psy = power_supply_get_by_name() Driver unbind, .remove power_supply_unregister() Device fully removed psy->get_property() The 'get_property' call is executed in invalid context because the driver was unbound and struct 'power_supply' memory was freed. This could be observed easily with charger manager driver (here compiled with max17040 fuel gauge): $ cat /sys/devices/virtual/power_supply/cm-battery/capacity & $ echo "1-0036" > /sys/bus/i2c/drivers/max17040/unbind [ 55.725123] Unable to handle kernel NULL pointer dereference at virtual address 00000000 [ 55.732584] pgd = d98d4000 [ 55.734060] [00000000] *pgd=5afa2831, *pte=00000000, *ppte=00000000 [ 55.740318] Internal error: Oops: 80000007 [#1] PREEMPT SMP ARM [ 55.746210] Modules linked in: [ 55.749259] CPU: 1 PID: 2936 Comm: cat Tainted: G W 3.19.0-rc1-next-20141226-00048-gf79f475f3c44-dirty #1496 [ 55.760190] Hardware name: SAMSUNG EXYNOS (Flattened Device Tree) [ 55.766270] task: d9b76f00 ti: daf54000 task.ti: daf54000 [ 55.771647] PC is at 0x0 [ 55.774182] LR is at charger_get_property+0x2f4/0x36c [ 55.779201] pc : [<00000000>] lr : [] psr: 60000013 [ 55.779201] sp : daf55e90 ip : 00000003 fp : 00000000 [ 55.790657] r10: 00000000 r9 : c06e2878 r8 : d9b26c68 [ 55.795865] r7 : dad81610 r6 : daec7410 r5 : daf55ebc r4 : 00000000 [ 55.802367] r3 : 00000000 r2 : daf55ebc r1 : 0000002a r0 : d9b26c68 [ 55.808879] Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user [ 55.815994] Control: 10c5387d Table: 598d406a DAC: 00000015 [ 55.821723] Process cat (pid: 2936, stack limit = 0xdaf54210) [ 55.827451] Stack: (0xdaf55e90 to 0xdaf56000) [ 55.831795] 5e80: 60000013 c01459c4 0000002a c06f8ef8 [ 55.839956] 5ea0: db651000 c06f8ef8 daebac00 c04cb668 daebac08 c0346864 00000000 c01459c4 [ 55.848115] 5ec0: d99eaa80 c06f8ef8 00000fff 00001000 db651000 c027f25c c027f240 d99eaa80 [ 55.856274] 5ee0: d9a06c00 c0146218 daf55f18 00001000 d99eaa80 db4c18c0 00000001 00000001 [ 55.864468] 5f00: daf55f80 c0144c78 c0144c54 c0107f90 00015000 d99eaab0 00000000 00000000 [ 55.872603] 5f20: 000051c7 00000000 db4c18c0 c04a9370 00015000 00001000 daf55f80 00001000 [ 55.880763] 5f40: daf54000 00015000 00000000 c00e53dc db4c18c0 c00e548c 0000000d 00008124 [ 55.888937] 5f60: 00000001 00000000 00000000 db4c18c0 db4c18c0 00001000 00015000 c00e5550 [ 55.897099] 5f80: 00000000 00000000 00001000 00001000 00015000 00000003 00000003 c000f364 [ 55.905239] 5fa0: 00000000 c000f1a0 00001000 00015000 00000003 00015000 00001000 0001333c [ 55.913399] 5fc0: 00001000 00015000 00000003 00000003 00000002 00000000 00000000 00000000 [ 55.921560] 5fe0: 7fffe000 be999850 0000a225 b6f3c19c 60000010 00000003 00000000 00000000 [ 55.929744] [] (charger_get_property) from [] (power_supply_show_property+0x48/0x20c) [ 55.939286] [] (power_supply_show_property) from [] (dev_attr_show+0x1c/0x48) [ 55.948130] [] (dev_attr_show) from [] (sysfs_kf_seq_show+0x84/0x104) [ 55.956298] [] (sysfs_kf_seq_show) from [] (kernfs_seq_show+0x24/0x28) [ 55.964536] [] (kernfs_seq_show) from [] (seq_read+0x1b0/0x484) [ 55.972172] [] (seq_read) from [] (__vfs_read+0x18/0x4c) [ 55.979188] [] (__vfs_read) from [] (vfs_read+0x7c/0x100) [ 55.986304] [] (vfs_read) from [] (SyS_read+0x40/0x8c) [ 55.993164] [] (SyS_read) from [] (ret_fast_syscall+0x0/0x48) [ 56.000626] Code: bad PC value [ 56.011652] ---[ end trace 7b64343fbdae8ef1 ]--- Signed-off-by: Krzysztof Kozlowski Reviewed-by: Bartlomiej Zolnierkiewicz [for the nvec part] Reviewed-by: Marc Dietrich [for compal-laptop.c] Acked-by: Darren Hart [for the mfd part] Acked-by: Lee Jones [for the hid part] Acked-by: Jiri Kosina [for the acpi part] Acked-by: Rafael J. Wysocki Signed-off-by: Sebastian Reichel --- include/linux/hid.h | 6 ++-- include/linux/mfd/abx500/ux500_chargalg.h | 11 +++++-- include/linux/mfd/rt5033.h | 2 +- include/linux/mfd/wm8350/supply.h | 6 ++-- include/linux/power/charger-manager.h | 3 +- include/linux/power_supply.h | 49 +++++++++++++++++++------------ 6 files changed, 47 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index efc7787a41a8..f94cf28e4b7c 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -514,10 +514,10 @@ struct hid_device { /* device report descriptor */ #ifdef CONFIG_HID_BATTERY_STRENGTH /* * Power supply information for HID devices which report - * battery strength. power_supply is registered iff - * battery.name is non-NULL. + * battery strength. power_supply was successfully registered if + * battery is non-NULL. */ - struct power_supply battery; + struct power_supply *battery; __s32 battery_min; __s32 battery_max; __s32 battery_report_type; diff --git a/include/linux/mfd/abx500/ux500_chargalg.h b/include/linux/mfd/abx500/ux500_chargalg.h index 234c99143bf7..67703f23e7ba 100644 --- a/include/linux/mfd/abx500/ux500_chargalg.h +++ b/include/linux/mfd/abx500/ux500_chargalg.h @@ -9,8 +9,13 @@ #include -#define psy_to_ux500_charger(x) container_of((x), \ - struct ux500_charger, psy) +/* + * Valid only for supplies of type: + * - POWER_SUPPLY_TYPE_MAINS, + * - POWER_SUPPLY_TYPE_USB, + * because only them store as drv_data pointer to struct ux500_charger. + */ +#define psy_to_ux500_charger(x) power_supply_get_drvdata(psy) /* Forward declaration */ struct ux500_charger; @@ -35,7 +40,7 @@ struct ux500_charger_ops { * @power_path USB power path support */ struct ux500_charger { - struct power_supply psy; + struct power_supply *psy; struct ux500_charger_ops ops; int max_out_volt; int max_out_curr; diff --git a/include/linux/mfd/rt5033.h b/include/linux/mfd/rt5033.h index 010cff49a98e..6cff5cf458d2 100644 --- a/include/linux/mfd/rt5033.h +++ b/include/linux/mfd/rt5033.h @@ -39,7 +39,7 @@ struct rt5033_battery { struct i2c_client *client; struct rt5033_dev *rt5033; struct regmap *regmap; - struct power_supply psy; + struct power_supply *psy; }; /* RT5033 charger platform data */ diff --git a/include/linux/mfd/wm8350/supply.h b/include/linux/mfd/wm8350/supply.h index 2b9479310bbd..8dc93673e34a 100644 --- a/include/linux/mfd/wm8350/supply.h +++ b/include/linux/mfd/wm8350/supply.h @@ -123,9 +123,9 @@ struct wm8350_charger_policy { struct wm8350_power { struct platform_device *pdev; - struct power_supply battery; - struct power_supply usb; - struct power_supply ac; + struct power_supply *battery; + struct power_supply *usb; + struct power_supply *ac; struct wm8350_charger_policy *policy; int rev_g_coeff; diff --git a/include/linux/power/charger-manager.h b/include/linux/power/charger-manager.h index 416ebeb6ee1e..eadf28cb2fc9 100644 --- a/include/linux/power/charger-manager.h +++ b/include/linux/power/charger-manager.h @@ -242,7 +242,8 @@ struct charger_manager { int emergency_stop; char psy_name_buf[PSY_NAME_MAX + 1]; - struct power_supply charger_psy; + struct power_supply_desc charger_psy_desc; + struct power_supply *charger_psy; u64 charging_start_time; u64 charging_end_time; diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 7ae60346465f..ea15eb68f609 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -13,6 +13,7 @@ #ifndef __LINUX_POWER_SUPPLY_H__ #define __LINUX_POWER_SUPPLY_H__ +#include #include #include #include @@ -173,10 +174,10 @@ union power_supply_propval { const char *strval; }; -struct device; struct device_node; +struct power_supply; -/* Power supply instance specific configuration */ +/* Run-time specific power supply configuration */ struct power_supply_config { struct device_node *of_node; /* Driver private data */ @@ -186,19 +187,13 @@ struct power_supply_config { size_t num_supplicants; }; -struct power_supply { +/* Description of power supply */ +struct power_supply_desc { const char *name; enum power_supply_type type; enum power_supply_property *properties; size_t num_properties; - char **supplied_to; - size_t num_supplicants; - - char **supplied_from; - size_t num_supplies; - struct device_node *of_node; - /* * Functions for drivers implementing power supply class. * These shouldn't be called directly by other drivers for accessing @@ -224,12 +219,23 @@ struct power_supply { bool no_thermal; /* For APM emulation, think legacy userspace. */ int use_for_apm; +}; + +struct power_supply { + const struct power_supply_desc *desc; + + char **supplied_to; + size_t num_supplicants; + + char **supplied_from; + size_t num_supplies; + struct device_node *of_node; /* Driver private data */ void *drv_data; /* private */ - struct device *dev; + struct device dev; struct work_struct changed_work; spinlock_t changed_lock; bool changed; @@ -303,17 +309,22 @@ extern int power_supply_set_property(struct power_supply *psy, extern int power_supply_property_is_writeable(struct power_supply *psy, enum power_supply_property psp); extern void power_supply_external_power_changed(struct power_supply *psy); -extern int power_supply_register(struct device *parent, - struct power_supply *psy, + +extern struct power_supply *__must_check +power_supply_register(struct device *parent, + const struct power_supply_desc *desc, const struct power_supply_config *cfg); -extern int power_supply_register_no_ws(struct device *parent, - struct power_supply *psy, +extern struct power_supply *__must_check +power_supply_register_no_ws(struct device *parent, + const struct power_supply_desc *desc, const struct power_supply_config *cfg); -extern int devm_power_supply_register(struct device *parent, - struct power_supply *psy, +extern struct power_supply *__must_check +devm_power_supply_register(struct device *parent, + const struct power_supply_desc *desc, const struct power_supply_config *cfg); -extern int devm_power_supply_register_no_ws(struct device *parent, - struct power_supply *psy, +extern struct power_supply *__must_check +devm_power_supply_register_no_ws(struct device *parent, + const struct power_supply_desc *desc, const struct power_supply_config *cfg); extern void power_supply_unregister(struct power_supply *psy); extern int power_supply_powers(struct power_supply *psy, struct device *dev); -- cgit v1.2.3 From 1a352462b5377ac68f5955d674b3460c7bac52a3 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 12 Mar 2015 08:44:12 +0100 Subject: power_supply: Add power_supply_put for decrementing device reference counter The power_supply_get_by_phandle() and power_supply_get_by_name() use function class_find_device() for obtaining the reference to power supply. Each use of class_find_device() increases the power supply's device reference counter. However the reference counter was not decreased by users of this API. Thus final device_unregister() call from power_supply_unregister() could not release the device and clean up its resources. This lead to memory leak if at least once power_supply_get_by_*() was called between registering and unregistering the power supply. Add and document new API power_supply_put() for decrementing the reference counter. Signed-off-by: Krzysztof Kozlowski Acked-by: Pavel Machek Reviewed-by: Bartlomiej Zolnierkiewicz Reviewed-by: Sebastian Reichel Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index ea15eb68f609..75a1dd8dc56e 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -282,6 +282,7 @@ extern struct atomic_notifier_head power_supply_notifier; extern int power_supply_reg_notifier(struct notifier_block *nb); extern void power_supply_unreg_notifier(struct notifier_block *nb); extern struct power_supply *power_supply_get_by_name(const char *name); +extern void power_supply_put(struct power_supply *psy); #ifdef CONFIG_OF extern struct power_supply *power_supply_get_by_phandle(struct device_node *np, const char *property); -- cgit v1.2.3 From 3f3c4bb5ec7c645d1151e1e8d6e56c71a050cf85 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 4 Mar 2015 21:19:59 +0100 Subject: mac802154: correct max sifs size handling This patch fix the max sifs size correction when the IEEE802154_HW_TX_OMIT_CKSUM flag is set. With this flag the sk_buff doesn't contain the CRC, because the transceiver will add the CRC while transmit. Also add some defines for the max sifs frame size value and frame check sequence according to 802.15.4 standard. Signed-off-by: Alexander Aring Acked-by: Marc Kleine-Budde Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 40b0ab953937..8872ca103d06 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -30,6 +30,7 @@ #define IEEE802154_MTU 127 #define IEEE802154_ACK_PSDU_LEN 5 #define IEEE802154_MIN_PSDU_LEN 9 +#define IEEE802154_FCS_LEN 2 #define IEEE802154_PAN_ID_BROADCAST 0xffff #define IEEE802154_ADDR_SHORT_BROADCAST 0xffff @@ -39,6 +40,7 @@ #define IEEE802154_LIFS_PERIOD 40 #define IEEE802154_SIFS_PERIOD 12 +#define IEEE802154_MAX_SIFS_FRAME_SIZE 18 #define IEEE802154_MAX_CHANNEL 26 #define IEEE802154_MAX_PAGE 31 -- cgit v1.2.3 From 08b55e2a9208e4841a17c9d9c2c454986392977d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 11 Mar 2015 15:43:43 +0000 Subject: genirq: Add irqchip_set_wake_parent This proves to be useful with stacked domains, when the current domain doesn't implement wake-up, but expect the parent to do so. Acked-by: Tony Lindgren Signed-off-by: Marc Zyngier Link: https://lkml.kernel.org/r/1426088629-15377-2-git-send-email-marc.zyngier@arm.com Signed-off-by: Jason Cooper --- include/linux/irq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index d09ec7a1243e..3057c48e4933 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -460,6 +460,7 @@ extern void irq_chip_eoi_parent(struct irq_data *data); extern int irq_chip_set_affinity_parent(struct irq_data *data, const struct cpumask *dest, bool force); +extern int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on); #endif /* Handling of unhandled and spurious interrupts: */ -- cgit v1.2.3 From 783d31863fb826f1a3754a2d5959a022a1b12d54 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 11 Mar 2015 15:43:44 +0000 Subject: irqchip: crossbar: Convert dra7 crossbar to stacked domains Support for the TI crossbar used on the DRA7 family of chips is implemented as an ugly hack on the side of the GIC. Converting it to stacked domains makes it slightly more palatable, as it results in a cleanup. Unfortunately, as the DT bindings failed to acknowledge the fact that this is actually yet another interrupt controller (the third, actually), we have yet another breakage. Oh well. Acked-by: Tony Lindgren Signed-off-by: Marc Zyngier Link: https://lkml.kernel.org/r/1426088629-15377-3-git-send-email-marc.zyngier@arm.com Signed-off-by: Jason Cooper --- include/linux/irqchip/irq-crossbar.h | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 include/linux/irqchip/irq-crossbar.h (limited to 'include/linux') diff --git a/include/linux/irqchip/irq-crossbar.h b/include/linux/irqchip/irq-crossbar.h deleted file mode 100644 index e5537b81df8d..000000000000 --- a/include/linux/irqchip/irq-crossbar.h +++ /dev/null @@ -1,11 +0,0 @@ -/* - * drivers/irqchip/irq-crossbar.h - * - * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ -int irqcrossbar_init(void); -- cgit v1.2.3 From a5561c3e845cae41ae40c15689a7f26e90b000c8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 11 Mar 2015 15:43:46 +0000 Subject: irqchip: gic: Get rid of routable domain The only user of the so called "routable domain" functionality now being fixed, let's clean up the GIC. Acked-by: Tony Lindgren Signed-off-by: Marc Zyngier Link: https://lkml.kernel.org/r/1426088629-15377-5-git-send-email-marc.zyngier@arm.com Signed-off-by: Jason Cooper --- include/linux/irqchip/arm-gic.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 71d706d5f169..3978c5be4edc 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -115,11 +115,5 @@ int gic_get_cpu_id(unsigned int cpu); void gic_migrate_target(unsigned int new_cpu_id); unsigned long gic_get_sgir_physaddr(void); -extern const struct irq_domain_ops *gic_routable_irq_domain_ops; -static inline void __init register_routable_domain_ops - (const struct irq_domain_ops *ops) -{ - gic_routable_irq_domain_ops = ops; -} #endif /* __ASSEMBLY */ #endif -- cgit v1.2.3 From 49869be2d9d0e9195706da7050c81bc909f41f4f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 11 Mar 2015 15:45:34 +0000 Subject: irqchip: gic: Add an entry point to set up irqchip flags A common use of gic_arch_extn is to set up additional flags to the GIC irqchip. It looks like a benign enough hack that doesn't really require the users of that feature to be converted to stacked domains. Add a gic_set_irqchip_flags() function that platform code can call instead of using the dreaded gic_arch_extn. Signed-off-by: Marc Zyngier Link: https://lkml.kernel.org/r/1426088737-15817-2-git-send-email-marc.zyngier@arm.com Signed-off-by: Jason Cooper --- include/linux/irqchip/arm-gic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 71d706d5f169..3bca864fd6fc 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -97,6 +97,7 @@ struct device_node; extern struct irq_chip gic_arch_extn; +void gic_set_irqchip_flags(unsigned long flags); void gic_init_bases(unsigned int, int, void __iomem *, void __iomem *, u32 offset, struct device_node *); void gic_cascade_irq(unsigned int gic_nr, unsigned int irq); -- cgit v1.2.3 From eddee5ba34eb6c9890ef106f19ead2b370e5342f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 14 Mar 2015 13:57:20 +1100 Subject: rhashtable: Fix walker behaviour during rehash Previously whenever the walker encountered a resize it simply snaps back to the beginning and starts again. However, this only works if the rehash started and completed while the walker was idle. If the walker attempts to restart while the rehash is still ongoing, we may miss objects that we shouldn't have. This patch fixes this by making the walker walk the old table followed by the new table just like all other readers. If a rehash is detected we will still signal our caller of the fact so they can prepare for duplicates but we will simply continue the walk onto the new table after the old one is finished either by us or by the rehasher. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index c93ff8ac474a..4192682c1d5c 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -53,6 +53,7 @@ struct rhash_head { * @shift: Current size (1 << shift) * @locks_mask: Mask to apply before accessing locks[] * @locks: Array of spinlocks protecting individual buckets + * @walkers: List of active walkers * @buckets: size * hash buckets */ struct bucket_table { @@ -61,6 +62,7 @@ struct bucket_table { u32 shift; unsigned int locks_mask; spinlock_t *locks; + struct list_head walkers; struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; @@ -104,7 +106,6 @@ struct rhashtable_params { * @p: Configuration parameters * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping - * @walkers: List of active walkers * @being_destroyed: True if table is set up for destruction */ struct rhashtable { @@ -115,17 +116,16 @@ struct rhashtable { struct rhashtable_params p; struct work_struct run_work; struct mutex mutex; - struct list_head walkers; }; /** * struct rhashtable_walker - Hash table walker * @list: List entry on list of walkers - * @resize: Resize event occured + * @tbl: The table that we were walking over */ struct rhashtable_walker { struct list_head list; - bool resize; + struct bucket_table *tbl; }; /** -- cgit v1.2.3 From 9d901bc05153bbf33b5da2cd6266865e531f0545 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 14 Mar 2015 13:57:23 +1100 Subject: rhashtable: Free bucket tables asynchronously after rehash There is in fact no need to wait for an RCU grace period in the rehash function, since all insertions are guaranteed to go into the new table through spin locks. This patch uses call_rcu to free the old/rehashed table at our leisure. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 4192682c1d5c..a0abddd226b3 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -54,6 +54,7 @@ struct rhash_head { * @locks_mask: Mask to apply before accessing locks[] * @locks: Array of spinlocks protecting individual buckets * @walkers: List of active walkers + * @rcu: RCU structure for freeing the table * @buckets: size * hash buckets */ struct bucket_table { @@ -63,6 +64,7 @@ struct bucket_table { unsigned int locks_mask; spinlock_t *locks; struct list_head walkers; + struct rcu_head rcu; struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; -- cgit v1.2.3 From 63d512d0cffcae40505d9448abd509972465e846 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 14 Mar 2015 13:57:24 +1100 Subject: rhashtable: Add rehash counter to bucket_table This patch adds a rehash counter to bucket_table to indicate the last bucket that has been rehashed. This serves two purposes: 1. Any bucket that has been rehashed can never gain a new object. 2. If the rehash counter reaches the size of the table, the table will forever remain empty. This patch also downsizes bucket_table->size to an unsigned int since we do not support sizes greater than 32 bits yet. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index a0abddd226b3..ed7562ad4ca0 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -49,6 +49,7 @@ struct rhash_head { /** * struct bucket_table - Table of hash buckets * @size: Number of hash buckets + * @rehash: Current bucket being rehashed * @hash_rnd: Random seed to fold into hash * @shift: Current size (1 << shift) * @locks_mask: Mask to apply before accessing locks[] @@ -58,7 +59,8 @@ struct rhash_head { * @buckets: size * hash buckets */ struct bucket_table { - size_t size; + unsigned int size; + unsigned int rehash; u32 hash_rnd; u32 shift; unsigned int locks_mask; -- cgit v1.2.3 From c4db8848af6af92f90462258603be844baeab44d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 14 Mar 2015 13:57:25 +1100 Subject: rhashtable: Move future_tbl into struct bucket_table This patch moves future_tbl to open up the possibility of having multiple rehashes on the same table. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index ed7562ad4ca0..1695378b3c5b 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -56,6 +56,7 @@ struct rhash_head { * @locks: Array of spinlocks protecting individual buckets * @walkers: List of active walkers * @rcu: RCU structure for freeing the table + * @future_tbl: Table under construction during rehashing * @buckets: size * hash buckets */ struct bucket_table { @@ -68,6 +69,8 @@ struct bucket_table { struct list_head walkers; struct rcu_head rcu; + struct bucket_table __rcu *future_tbl; + struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; @@ -105,7 +108,6 @@ struct rhashtable_params { /** * struct rhashtable - Hash table handle * @tbl: Bucket table - * @future_tbl: Table under construction during expansion/shrinking * @nelems: Number of elements in table * @p: Configuration parameters * @run_work: Deferred worker to expand/shrink asynchronously @@ -114,7 +116,6 @@ struct rhashtable_params { */ struct rhashtable { struct bucket_table __rcu *tbl; - struct bucket_table __rcu *future_tbl; atomic_t nelems; bool being_destroyed; struct rhashtable_params p; -- cgit v1.2.3 From 03e69b508b6f7c51743055c9f61d1dfeadf4b635 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 14 Mar 2015 02:27:16 +0100 Subject: ebpf: add prandom helper for packet sampling This work is similar to commit 4cd3675ebf74 ("filter: added BPF random opcode") and adds a possibility for packet sampling in eBPF. Currently, this is only possible in classic BPF and useful to combine sampling with f.e. packet sockets, possible also with tc. Example function proto-type looks like: u32 (*prandom_u32)(void) = (void *)BPF_FUNC_get_prandom_u32; Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 80f2e0fc3d02..50bf95e29a96 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -154,4 +154,6 @@ extern const struct bpf_func_proto bpf_map_lookup_elem_proto; extern const struct bpf_func_proto bpf_map_update_elem_proto; extern const struct bpf_func_proto bpf_map_delete_elem_proto; +extern const struct bpf_func_proto bpf_get_prandom_u32_proto; + #endif /* _LINUX_BPF_H */ -- cgit v1.2.3 From c04167ce2ca0ecaeaafef006cb0d65cf01b68e42 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 14 Mar 2015 02:27:17 +0100 Subject: ebpf: add helper for obtaining current processor id This patch adds the possibility to obtain raw_smp_processor_id() in eBPF. Currently, this is only possible in classic BPF where commit da2033c28226 ("filter: add SKF_AD_RXHASH and SKF_AD_CPU") has added facilities for this. Perhaps most importantly, this would also allow us to track per CPU statistics with eBPF maps, or to implement a poor-man's per CPU data structure through eBPF maps. Example function proto-type looks like: u32 (*smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 50bf95e29a96..30bfd331882a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -155,5 +155,6 @@ extern const struct bpf_func_proto bpf_map_update_elem_proto; extern const struct bpf_func_proto bpf_map_delete_elem_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; +extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; #endif /* _LINUX_BPF_H */ -- cgit v1.2.3 From 9bac3d6d548e5cc925570b263f35b70a00a00ffd Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 13 Mar 2015 11:57:42 -0700 Subject: bpf: allow extended BPF programs access skb fields introduce user accessible mirror of in-kernel 'struct sk_buff': struct __sk_buff { __u32 len; __u32 pkt_type; __u32 mark; __u32 queue_mapping; }; bpf programs can do: int bpf_prog(struct __sk_buff *skb) { __u32 var = skb->pkt_type; which will be compiled to bpf assembler as: dst_reg = *(u32 *)(src_reg + 4) // 4 == offsetof(struct __sk_buff, pkt_type) bpf verifier will check validity of access and will convert it to: dst_reg = *(u8 *)(src_reg + offsetof(struct sk_buff, __pkt_type_offset)) dst_reg &= 7 since skb->pkt_type is a bitfield. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 30bfd331882a..280a315de8d6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -103,6 +103,9 @@ struct bpf_verifier_ops { * with 'type' (read or write) is allowed */ bool (*is_valid_access)(int off, int size, enum bpf_access_type type); + + u32 (*convert_ctx_access)(int dst_reg, int src_reg, int ctx_off, + struct bpf_insn *insn); }; struct bpf_prog_type_list { @@ -133,7 +136,7 @@ struct bpf_map *bpf_map_get(struct fd f); void bpf_map_put(struct bpf_map *map); /* verify correctness of eBPF program */ -int bpf_check(struct bpf_prog *fp, union bpf_attr *attr); +int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); #else static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl) { -- cgit v1.2.3 From 4170604feec780d00e7511c24fa0f6e5c2e4ed75 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 15 Mar 2015 21:07:14 -0700 Subject: switchdev: add swdev ops As discussed at netconf, introduce swdev_ops as first step to move switchdev ops from ndo to swdev. This will keep switchdev from cluttering up ndo ops space. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ddab1a2a07a0..9e8a2a933c68 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1577,6 +1577,9 @@ struct net_device { const struct net_device_ops *netdev_ops; const struct ethtool_ops *ethtool_ops; const struct forwarding_accel_ops *fwd_ops; +#ifdef CONFIG_NET_SWITCHDEV + const struct swdev_ops *swdev_ops; +#endif const struct header_ops *header_ops; -- cgit v1.2.3 From 812a1c3ff3ee9d5100e0e71edb06681014e84a9b Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 15 Mar 2015 21:07:16 -0700 Subject: netdev: remove ndo ops for switchdev Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 38 -------------------------------------- 1 file changed, 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9e8a2a933c68..dd1d069758be 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -768,8 +768,6 @@ struct netdev_phys_item_id { typedef u16 (*select_queue_fallback_t)(struct net_device *dev, struct sk_buff *skb); -struct fib_info; - /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are @@ -1024,23 +1022,6 @@ struct fib_info; * be otherwise expressed by feature flags. The check is called with * the set of features that the stack has calculated and it returns * those the driver believes to be appropriate. - * - * int (*ndo_switch_parent_id_get)(struct net_device *dev, - * struct netdev_phys_item_id *psid); - * Called to get an ID of the switch chip this port is part of. - * If driver implements this, it indicates that it represents a port - * of a switch chip. - * int (*ndo_switch_port_stp_update)(struct net_device *dev, u8 state); - * Called to notify switch device port of bridge port STP - * state change. - * int (*ndo_sw_parent_fib_ipv4_add)(struct net_device *dev, __be32 dst, - * int dst_len, struct fib_info *fi, - * u8 tos, u8 type, u32 nlflags, u32 tb_id); - * Called to add/modify IPv4 route to switch device. - * int (*ndo_sw_parent_fib_ipv4_del)(struct net_device *dev, __be32 dst, - * int dst_len, struct fib_info *fi, - * u8 tos, u8 type, u32 tb_id); - * Called to delete IPv4 route from switch device. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1197,25 +1178,6 @@ struct net_device_ops { netdev_features_t (*ndo_features_check) (struct sk_buff *skb, struct net_device *dev, netdev_features_t features); -#ifdef CONFIG_NET_SWITCHDEV - int (*ndo_switch_parent_id_get)(struct net_device *dev, - struct netdev_phys_item_id *psid); - int (*ndo_switch_port_stp_update)(struct net_device *dev, - u8 state); - int (*ndo_switch_fib_ipv4_add)(struct net_device *dev, - __be32 dst, - int dst_len, - struct fib_info *fi, - u8 tos, u8 type, - u32 nlflags, - u32 tb_id); - int (*ndo_switch_fib_ipv4_del)(struct net_device *dev, - __be32 dst, - int dst_len, - struct fib_info *fi, - u8 tos, u8 type, - u32 tb_id); -#endif }; /** -- cgit v1.2.3 From 4d9b519c9bcab5718053f8717dadad7b09b41f5e Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 12 Mar 2015 14:00:02 -0700 Subject: hwrng: add devm_* interfaces This change adds devm_hwrng_register and devm_hwrng_unregister which use can simplify error unwinding and unbinding code paths in device drivers. Signed-off-by: Dmitry Torokhov Signed-off-by: Herbert Xu --- include/linux/hw_random.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index eb7b414d232b..4f7d8f4b1e9a 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -50,10 +50,14 @@ struct hwrng { struct completion cleanup_done; }; +struct device; + /** Register a new Hardware Random Number Generator driver. */ extern int hwrng_register(struct hwrng *rng); +extern int devm_hwrng_register(struct device *dev, struct hwrng *rng); /** Unregister a Hardware Random Number Generator driver. */ extern void hwrng_unregister(struct hwrng *rng); +extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng); /** Feed random bits into the pool. */ extern void add_hwgenerator_randomness(const char *buffer, size_t count, size_t entropy); -- cgit v1.2.3 From c055d5b03bb4cb69d349d787c9787c0383abd8b2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 10 Mar 2015 05:08:19 +0100 Subject: netfilter: bridge: query conntrack about skb dnat ask conntrack instead of storing ipv4 address in nf_bridge_info->data. Ths avoids the need to use ->data during NF_PRE_ROUTING. Only two functions that need ->data remain. These will be addressed in followup patches. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index bb39113ea596..de123d769ffc 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -54,12 +54,6 @@ static inline unsigned int nf_bridge_pad(const struct sk_buff *skb) return 0; } -struct bridge_skb_cb { - union { - __be32 ipv4; - } daddr; -}; - static inline void br_drop_fake_rtable(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); -- cgit v1.2.3 From e4bb9bcbfb7d67431dfd49860f62770a7f40193b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 10 Mar 2015 10:36:48 +0100 Subject: netfilter: bridge: remove BRNF_STATE_BRIDGED flag Its not needed anymore since 2bf540b73ed5b ([NETFILTER]: bridge-netfilter: remove deferred hooks). Before this it was possible to have physoutdev set for locally generated packets -- this isn't the case anymore: BRNF_STATE_BRIDGED flag is set when we assign nf_bridge->physoutdev, so physoutdev != NULL means BRNF_STATE_BRIDGED is set. If physoutdev is NULL, then we are looking at locally-delivered and routed packet. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index de123d769ffc..ed0d3bf953c3 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -19,7 +19,6 @@ enum nf_br_hook_priorities { #define BRNF_PKT_TYPE 0x01 #define BRNF_BRIDGED_DNAT 0x02 -#define BRNF_BRIDGED 0x04 #define BRNF_NF_BRIDGE_PREROUTING 0x08 #define BRNF_8021Q 0x10 #define BRNF_PPPoE 0x20 -- cgit v1.2.3 From 1aa15f4e00c9be2c7e4c3bd36886ed65d25cc075 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 12 Mar 2015 13:07:26 +0100 Subject: mfd: syscon: Add atmel system timer registers definition AT91RM920 has a memory range reserved for timer and watchdog configuration. Expose those registers so that drivers can make use of the system timer syscon declared in at91 DTs. Signed-off-by: Alexandre Belloni Acked-by: Lee Jones Signed-off-by: Nicolas Ferre --- include/linux/mfd/syscon/atmel-st.h | 49 +++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 include/linux/mfd/syscon/atmel-st.h (limited to 'include/linux') diff --git a/include/linux/mfd/syscon/atmel-st.h b/include/linux/mfd/syscon/atmel-st.h new file mode 100644 index 000000000000..8acf1ec1fa32 --- /dev/null +++ b/include/linux/mfd/syscon/atmel-st.h @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2005 Ivan Kokshaysky + * Copyright (C) SAN People + * + * System Timer (ST) - System peripherals registers. + * Based on AT91RM9200 datasheet revision E. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _LINUX_MFD_SYSCON_ATMEL_ST_H +#define _LINUX_MFD_SYSCON_ATMEL_ST_H + +#include + +#define AT91_ST_CR 0x00 /* Control Register */ +#define AT91_ST_WDRST BIT(0) /* Watchdog Timer Restart */ + +#define AT91_ST_PIMR 0x04 /* Period Interval Mode Register */ +#define AT91_ST_PIV 0xffff /* Period Interval Value */ + +#define AT91_ST_WDMR 0x08 /* Watchdog Mode Register */ +#define AT91_ST_WDV 0xffff /* Watchdog Counter Value */ +#define AT91_ST_RSTEN BIT(16) /* Reset Enable */ +#define AT91_ST_EXTEN BIT(17) /* External Signal Assertion Enable */ + +#define AT91_ST_RTMR 0x0c /* Real-time Mode Register */ +#define AT91_ST_RTPRES 0xffff /* Real-time Prescalar Value */ + +#define AT91_ST_SR 0x10 /* Status Register */ +#define AT91_ST_PITS BIT(0) /* Period Interval Timer Status */ +#define AT91_ST_WDOVF BIT(1) /* Watchdog Overflow */ +#define AT91_ST_RTTINC BIT(2) /* Real-time Timer Increment */ +#define AT91_ST_ALMS BIT(3) /* Alarm Status */ + +#define AT91_ST_IER 0x14 /* Interrupt Enable Register */ +#define AT91_ST_IDR 0x18 /* Interrupt Disable Register */ +#define AT91_ST_IMR 0x1c /* Interrupt Mask Register */ + +#define AT91_ST_RTAR 0x20 /* Real-time Alarm Register */ +#define AT91_ST_ALMV 0xfffff /* Alarm Value */ + +#define AT91_ST_CRTR 0x24 /* Current Real-time Register */ +#define AT91_ST_CRTV 0xfffff /* Current Real-Time Value */ + +#endif /* _LINUX_MFD_SYSCON_ATMEL_ST_H */ -- cgit v1.2.3 From 3b62286d0ef785815994e2558e8cfb686597b0cd Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Mon, 16 Mar 2015 09:37:24 +0200 Subject: dmaengine: Remove FSF mailing addresses Free Software Foundation mailing address has been moved in the past and some of the addresses here are outdated. Remove them from file headers since the COPYING file in the kernel sources includes it. Signed-off-by: Jarkko Nikula Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 2bff9abc162a..ad419757241f 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -11,10 +11,6 @@ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 - * Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * * The full GNU General Public License is included in this distribution in the * file called COPYING. */ -- cgit v1.2.3 From 7e992d692750b2938224eb43fee907181d92a602 Mon Sep 17 00:00:00 2001 From: Antonios Motakis Date: Mon, 16 Mar 2015 14:08:54 -0600 Subject: vfio: move eventfd support code for VFIO_PCI to a separate file The virqfd functionality that is used by VFIO_PCI to implement interrupt masking and unmasking via an eventfd, is generic enough and can be reused by another driver. Move it to a separate file in order to allow the code to be shared. Signed-off-by: Antonios Motakis Signed-off-by: Baptiste Reynal Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Alex Williamson --- include/linux/vfio.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 2d67b8998fd8..683b5146022e 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -14,6 +14,8 @@ #include #include +#include +#include #include /** @@ -123,4 +125,29 @@ static inline long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, return -ENOTTY; } #endif /* CONFIG_EEH */ + +/* + * IRQfd - generic + */ +struct virqfd { + void *opaque; + struct eventfd_ctx *eventfd; + int (*handler)(void *, void *); + void (*thread)(void *, void *); + void *data; + struct work_struct inject; + wait_queue_t wait; + poll_table pt; + struct work_struct shutdown; + struct virqfd **pvirqfd; +}; + +extern int vfio_virqfd_init(void); +extern void vfio_virqfd_exit(void); +extern int vfio_virqfd_enable(void *opaque, + int (*handler)(void *, void *), + void (*thread)(void *, void *), + void *data, struct virqfd **pvirqfd, int fd); +extern void vfio_virqfd_disable(struct virqfd **pvirqfd); + #endif /* VFIO_H */ -- cgit v1.2.3 From 34644524bce91883d5051a7eaf3ec5464ed149bf Mon Sep 17 00:00:00 2001 From: Abhilash Kesavan Date: Fri, 6 Feb 2015 19:15:27 +0530 Subject: lib: devres: add a helper function for ioremap_wc Implement a resource managed writecombine ioremap function. Signed-off-by: Abhilash Kesavan Acked-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- include/linux/io.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/io.h b/include/linux/io.h index fa02e55e5a2e..42b33f03d1df 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -64,6 +64,8 @@ void __iomem *devm_ioremap(struct device *dev, resource_size_t offset, resource_size_t size); void __iomem *devm_ioremap_nocache(struct device *dev, resource_size_t offset, resource_size_t size); +void __iomem *devm_ioremap_wc(struct device *dev, resource_size_t offset, + resource_size_t size); void devm_iounmap(struct device *dev, void __iomem *addr); int check_signature(const volatile void __iomem *io_addr, const unsigned char *signature, int length); -- cgit v1.2.3 From ce793486e23e0162a732c605189c8028e0910e86 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 16 Mar 2015 23:49:03 +0100 Subject: driver core / ACPI: Represent ACPI companions using fwnode_handle Now that we have struct fwnode_handle, we can use that to point to ACPI companions from struct device objects instead of pointing to struct acpi_device directly. There are two benefits from that. First, the somewhat ugly and hackish struct acpi_dev_node can be dropped and, second, the same struct fwnode_handle pointer can be used in the future to point to other (non-ACPI) firmware device node types. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman Acked-by: Grant Likely --- include/linux/acpi.h | 5 +++-- include/linux/device.h | 13 +++---------- include/linux/fwnode.h | 25 +++++++++++++++++++++++++ include/linux/i2c.h | 4 ++-- include/linux/platform_device.h | 2 +- include/linux/property.h | 11 +---------- 6 files changed, 35 insertions(+), 25 deletions(-) create mode 100644 include/linux/fwnode.h (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 24c7aa8b1d20..402ddbdc2da1 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -53,8 +53,9 @@ static inline acpi_handle acpi_device_handle(struct acpi_device *adev) return adev ? adev->handle : NULL; } -#define ACPI_COMPANION(dev) ((dev)->acpi_node.companion) -#define ACPI_COMPANION_SET(dev, adev) ACPI_COMPANION(dev) = (adev) +#define ACPI_COMPANION(dev) acpi_node((dev)->fwnode) +#define ACPI_COMPANION_SET(dev, adev) (dev)->fwnode = (adev) ? \ + acpi_fwnode_handle(adev) : NULL #define ACPI_HANDLE(dev) acpi_device_handle(ACPI_COMPANION(dev)) static inline void acpi_preset_companion(struct device *dev, diff --git a/include/linux/device.h b/include/linux/device.h index 0eb8ee2dc6d1..badef20b876a 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -38,6 +38,7 @@ struct class; struct subsys_private; struct bus_type; struct device_node; +struct fwnode_handle; struct iommu_ops; struct iommu_group; @@ -650,14 +651,6 @@ struct device_dma_parameters { unsigned long segment_boundary_mask; }; -struct acpi_device; - -struct acpi_dev_node { -#ifdef CONFIG_ACPI - struct acpi_device *companion; -#endif -}; - /** * struct device - The basic device structure * @parent: The device's "parent" device, the device to which it is attached. @@ -703,7 +696,7 @@ struct acpi_dev_node { * @cma_area: Contiguous memory area for dma allocations * @archdata: For arch-specific additions. * @of_node: Associated device tree node. - * @acpi_node: Associated ACPI device node. + * @fwnode: Associated device node supplied by platform firmware. * @devt: For creating the sysfs "dev". * @id: device instance * @devres_lock: Spinlock to protect the resource of the device. @@ -779,7 +772,7 @@ struct device { struct dev_archdata archdata; struct device_node *of_node; /* associated device tree node */ - struct acpi_dev_node acpi_node; /* associated ACPI device node */ + struct fwnode_handle *fwnode; /* firmware device node */ dev_t devt; /* dev_t, creates the sysfs "dev" */ u32 id; /* device instance */ diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h new file mode 100644 index 000000000000..17bb5f039509 --- /dev/null +++ b/include/linux/fwnode.h @@ -0,0 +1,25 @@ +/* + * fwnode.h - Firmware device node object handle type definition. + * + * Copyright (C) 2015, Intel Corporation + * Author: Rafael J. Wysocki + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _LINUX_FWNODE_H_ +#define _LINUX_FWNODE_H_ + +enum fwnode_type { + FWNODE_INVALID = 0, + FWNODE_OF, + FWNODE_ACPI, +}; + +struct fwnode_handle { + enum fwnode_type type; +}; + +#endif diff --git a/include/linux/i2c.h b/include/linux/i2c.h index f17da50402a4..6d89575361a8 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -278,7 +278,7 @@ static inline int i2c_slave_event(struct i2c_client *client, * @platform_data: stored in i2c_client.dev.platform_data * @archdata: copied into i2c_client.dev.archdata * @of_node: pointer to OpenFirmware device node - * @acpi_node: ACPI device node + * @fwnode: device node supplied by the platform firmware * @irq: stored in i2c_client.irq * * I2C doesn't actually support hardware probing, although controllers and @@ -299,7 +299,7 @@ struct i2c_board_info { void *platform_data; struct dev_archdata *archdata; struct device_node *of_node; - struct acpi_dev_node acpi_node; + struct fwnode_handle *fwnode; int irq; }; diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index ae4882ca4a64..58f1e75ba105 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -59,7 +59,7 @@ extern int platform_add_devices(struct platform_device **, int); struct platform_device_info { struct device *parent; - struct acpi_dev_node acpi_node; + struct fwnode_handle *fwnode; const char *name; int id; diff --git a/include/linux/property.h b/include/linux/property.h index a6a3d98bd7e9..31dfd3db35d6 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -13,6 +13,7 @@ #ifndef _LINUX_PROPERTY_H_ #define _LINUX_PROPERTY_H_ +#include #include struct device; @@ -40,16 +41,6 @@ int device_property_read_string_array(struct device *dev, const char *propname, int device_property_read_string(struct device *dev, const char *propname, const char **val); -enum fwnode_type { - FWNODE_INVALID = 0, - FWNODE_OF, - FWNODE_ACPI, -}; - -struct fwnode_handle { - enum fwnode_type type; -}; - bool fwnode_property_present(struct fwnode_handle *fwnode, const char *propname); int fwnode_property_read_u8_array(struct fwnode_handle *fwnode, const char *propname, u8 *val, -- cgit v1.2.3 From ca5b74d2675a44f54aacb919c1cf022463e2f738 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 16 Mar 2015 23:49:08 +0100 Subject: ACPI: Introduce has_acpi_companion() Now that the ACPI companions of devices are represented by pointers to struct fwnode_handle, it is not quite efficient to check whether or not an ACPI companion of a device is present by evaluating the ACPI_COMPANION() macro. For this reason, introduce a special static inline routine for that, has_acpi_companion(), and update the code to use it where applicable. Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 402ddbdc2da1..ec488d03b518 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -58,6 +58,11 @@ static inline acpi_handle acpi_device_handle(struct acpi_device *adev) acpi_fwnode_handle(adev) : NULL #define ACPI_HANDLE(dev) acpi_device_handle(ACPI_COMPANION(dev)) +static inline bool has_acpi_companion(struct device *dev) +{ + return is_acpi_node(dev->fwnode); +} + static inline void acpi_preset_companion(struct device *dev, struct acpi_device *parent, u64 addr) { @@ -472,6 +477,11 @@ static inline struct fwnode_handle *acpi_fwnode_handle(struct acpi_device *adev) return NULL; } +static inline bool has_acpi_companion(struct device *dev) +{ + return false; +} + static inline const char *acpi_dev_name(struct acpi_device *adev) { return NULL; -- cgit v1.2.3 From 3876488444e71238e287459c39d7692b6f718c3e Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 9 Mar 2015 15:52:17 +0100 Subject: include/stddef.h: Move offsetofend() from vfio.h to a generic kernel header Suggested by Andy. Suggested-by: Andy Lutomirski Signed-off-by: Denys Vlasenko Acked-by: Linus Torvalds Cc: Alexei Starovoitov Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Oleg Nesterov Cc: Steven Rostedt Cc: Will Drewry Link: http://lkml.kernel.org/r/1425912738-559-1-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- include/linux/stddef.h | 9 +++++++++ include/linux/vfio.h | 13 ------------- 2 files changed, 9 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stddef.h b/include/linux/stddef.h index f4aec0e75c3a..076af437284d 100644 --- a/include/linux/stddef.h +++ b/include/linux/stddef.h @@ -19,3 +19,12 @@ enum { #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) #endif #endif + +/** + * offsetofend(TYPE, MEMBER) + * + * @TYPE: The type of the structure + * @MEMBER: The member within the structure to get the end offset of + */ +#define offsetofend(TYPE, MEMBER) \ + (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER)) diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 2d67b8998fd8..049b2f497bc7 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -78,19 +78,6 @@ extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops); extern void vfio_unregister_iommu_driver( const struct vfio_iommu_driver_ops *ops); -/** - * offsetofend(TYPE, MEMBER) - * - * @TYPE: The type of the structure - * @MEMBER: The member within the structure to get the end offset of - * - * Simple helper macro for dealing with variable sized structures passed - * from user space. This allows us to easily determine if the provided - * structure is sized to include various fields. - */ -#define offsetofend(TYPE, MEMBER) \ - (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER)) - /* * External user API */ -- cgit v1.2.3 From 937abe88aea3161cd3a563e577fc9cf4522c7aad Mon Sep 17 00:00:00 2001 From: Kedareswara rao Appana Date: Mon, 2 Mar 2015 23:24:24 +0530 Subject: dmaengine: xilinx-dma: move header file to common location This patch moves the xilinx_dma.h header file to the include/linux/dma. Signed-off-by: Kedareswara rao Appana Signed-off-by: Vinod Koul --- include/linux/amba/xilinx_dma.h | 47 ----------------------------------------- include/linux/dma/xilinx_dma.h | 47 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 47 deletions(-) delete mode 100644 include/linux/amba/xilinx_dma.h create mode 100644 include/linux/dma/xilinx_dma.h (limited to 'include/linux') diff --git a/include/linux/amba/xilinx_dma.h b/include/linux/amba/xilinx_dma.h deleted file mode 100644 index 34b98f276ed0..000000000000 --- a/include/linux/amba/xilinx_dma.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Xilinx DMA Engine drivers support header file - * - * Copyright (C) 2010-2014 Xilinx, Inc. All rights reserved. - * - * This is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#ifndef __DMA_XILINX_DMA_H -#define __DMA_XILINX_DMA_H - -#include -#include - -/** - * struct xilinx_vdma_config - VDMA Configuration structure - * @frm_dly: Frame delay - * @gen_lock: Whether in gen-lock mode - * @master: Master that it syncs to - * @frm_cnt_en: Enable frame count enable - * @park: Whether wants to park - * @park_frm: Frame to park on - * @coalesc: Interrupt coalescing threshold - * @delay: Delay counter - * @reset: Reset Channel - * @ext_fsync: External Frame Sync source - */ -struct xilinx_vdma_config { - int frm_dly; - int gen_lock; - int master; - int frm_cnt_en; - int park; - int park_frm; - int coalesc; - int delay; - int reset; - int ext_fsync; -}; - -int xilinx_vdma_channel_set_config(struct dma_chan *dchan, - struct xilinx_vdma_config *cfg); - -#endif diff --git a/include/linux/dma/xilinx_dma.h b/include/linux/dma/xilinx_dma.h new file mode 100644 index 000000000000..34b98f276ed0 --- /dev/null +++ b/include/linux/dma/xilinx_dma.h @@ -0,0 +1,47 @@ +/* + * Xilinx DMA Engine drivers support header file + * + * Copyright (C) 2010-2014 Xilinx, Inc. All rights reserved. + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __DMA_XILINX_DMA_H +#define __DMA_XILINX_DMA_H + +#include +#include + +/** + * struct xilinx_vdma_config - VDMA Configuration structure + * @frm_dly: Frame delay + * @gen_lock: Whether in gen-lock mode + * @master: Master that it syncs to + * @frm_cnt_en: Enable frame count enable + * @park: Whether wants to park + * @park_frm: Frame to park on + * @coalesc: Interrupt coalescing threshold + * @delay: Delay counter + * @reset: Reset Channel + * @ext_fsync: External Frame Sync source + */ +struct xilinx_vdma_config { + int frm_dly; + int gen_lock; + int master; + int frm_cnt_en; + int park; + int park_frm; + int coalesc; + int delay; + int reset; + int ext_fsync; +}; + +int xilinx_vdma_channel_set_config(struct dma_chan *dchan, + struct xilinx_vdma_config *cfg); + +#endif -- cgit v1.2.3 From e999c7289cf2e542e8be8bc72ba5dc0f8f06c88e Mon Sep 17 00:00:00 2001 From: Keerthy Date: Tue, 17 Mar 2015 15:56:05 +0530 Subject: regulator: palmas: Add has_regen3 check for TPS659038 Palmas driver is used to cater to even TPS659038 but TPS659038 does not have REGEN3 resource. Adding another field in the driver data to check on that. Signed-off-by: Keerthy Signed-off-by: Mark Brown --- include/linux/mfd/palmas.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h index ee7b1ce7a6f8..bb270bd03eed 100644 --- a/include/linux/mfd/palmas.h +++ b/include/linux/mfd/palmas.h @@ -117,6 +117,7 @@ struct palmas_pmic_driver_data { int ldo_begin; int ldo_end; int max_reg; + bool has_regen3; struct palmas_regs_info *palmas_regs_info; struct of_regulator_match *palmas_matches; struct palmas_sleep_requestor_info *sleep_req_info; -- cgit v1.2.3 From 36111da7838e186069ed1ec4fe2fe7510e81da55 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 9 Mar 2015 16:48:51 +0200 Subject: dmaengine: intel-mid-dma: remove the driver Since the last and the only user of this driver is converted to use dw_dmac we can remove driver from the tree. Moreover, besides the driver is unmaintained a long time, it serves for the DesignWare DMA IP, for which we have already driver in the tree. Signed-off-by: Andy Shevchenko Acked-by: Vinod Koul Signed-off-by: Mark Brown --- include/linux/intel_mid_dma.h | 76 ------------------------------------------- 1 file changed, 76 deletions(-) delete mode 100644 include/linux/intel_mid_dma.h (limited to 'include/linux') diff --git a/include/linux/intel_mid_dma.h b/include/linux/intel_mid_dma.h deleted file mode 100644 index 10496bd24c5c..000000000000 --- a/include/linux/intel_mid_dma.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * intel_mid_dma.h - Intel MID DMA Drivers - * - * Copyright (C) 2008-10 Intel Corp - * Author: Vinod Koul - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * - */ -#ifndef __INTEL_MID_DMA_H__ -#define __INTEL_MID_DMA_H__ - -#include - -#define DMA_PREP_CIRCULAR_LIST (1 << 10) - -/*DMA mode configurations*/ -enum intel_mid_dma_mode { - LNW_DMA_PER_TO_MEM = 0, /*periphral to memory configuration*/ - LNW_DMA_MEM_TO_PER, /*memory to periphral configuration*/ - LNW_DMA_MEM_TO_MEM, /*mem to mem confg (testing only)*/ -}; - -/*DMA handshaking*/ -enum intel_mid_dma_hs_mode { - LNW_DMA_HW_HS = 0, /*HW Handshaking only*/ - LNW_DMA_SW_HS = 1, /*SW Handshaking not recommended*/ -}; - -/*Burst size configuration*/ -enum intel_mid_dma_msize { - LNW_DMA_MSIZE_1 = 0x0, - LNW_DMA_MSIZE_4 = 0x1, - LNW_DMA_MSIZE_8 = 0x2, - LNW_DMA_MSIZE_16 = 0x3, - LNW_DMA_MSIZE_32 = 0x4, - LNW_DMA_MSIZE_64 = 0x5, -}; - -/** - * struct intel_mid_dma_slave - DMA slave structure - * - * @dirn: DMA trf direction - * @src_width: tx register width - * @dst_width: rx register width - * @hs_mode: HW/SW handshaking mode - * @cfg_mode: DMA data transfer mode (per-per/mem-per/mem-mem) - * @src_msize: Source DMA burst size - * @dst_msize: Dst DMA burst size - * @per_addr: Periphral address - * @device_instance: DMA peripheral device instance, we can have multiple - * peripheral device connected to single DMAC - */ -struct intel_mid_dma_slave { - enum intel_mid_dma_hs_mode hs_mode; /*handshaking*/ - enum intel_mid_dma_mode cfg_mode; /*mode configuration*/ - unsigned int device_instance; /*0, 1 for periphral instance*/ - struct dma_slave_config dma_slave; -}; - -#endif /*__INTEL_MID_DMA_H__*/ -- cgit v1.2.3 From 71be3423a62be548c56bab5b818e1a1383e659d2 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 17 Mar 2015 08:33:38 -0600 Subject: vfio: Split virqfd into a separate module for vfio bus drivers An unintended consequence of commit 42ac9bd18d4f ("vfio: initialize the virqfd workqueue in VFIO generic code") is that the vfio module is renamed to vfio_core so that it can include both vfio and virqfd. That's a user visible change that may break module loading scritps and it imposes eventfd support as a dependency on the core vfio code, which it's really not. virqfd is intended to be provided as a service to vfio bus drivers, so instead of wrapping it into vfio.ko, we can make it a stand-alone module toggled by vfio bus drivers. This has the additional benefit of removing initialization and exit from the core vfio code. Signed-off-by: Alex Williamson --- include/linux/vfio.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 683b5146022e..cbed15f194e0 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -142,8 +142,6 @@ struct virqfd { struct virqfd **pvirqfd; }; -extern int vfio_virqfd_init(void); -extern void vfio_virqfd_exit(void); extern int vfio_virqfd_enable(void *opaque, int (*handler)(void *, void *), void (*thread)(void *, void *), -- cgit v1.2.3 From 9439ce00f208d95703a6725e4ea986dd90e37ffd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 Mar 2015 18:32:29 -0700 Subject: tcp: rename struct tcp_request_sock listener The listener field in struct tcp_request_sock is a pointer back to the listener. We now have req->rsk_listener, so TCP only needs one boolean and not a full pointer. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 97dbf16f7d9d..f869ae8afbaf 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -111,7 +111,7 @@ struct tcp_request_sock_ops; struct tcp_request_sock { struct inet_request_sock req; const struct tcp_request_sock_ops *af_specific; - struct sock *listener; /* needed for TFO */ + bool tfo_listener; u32 rcv_isn; u32 snt_isn; u32 snt_synack; /* synack sent time */ -- cgit v1.2.3 From a572460be9cfb423c60275943f7921003b8cd372 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 11 Mar 2015 12:30:58 -0300 Subject: dmaengine: imx-sdma: Add support for version 3 firmware Currently when version 3.1 of the mx6q SDMA firmware is used we get: [ 0.392169] imx-sdma 20ec000.sdma: unknown firmware version [ 0.399281] imx-sdma 20ec000.sdma: initialized Add support for it. Based on a patch from Shengjiu Wang from the internal FSL kernel. Signed-off-by: Fabio Estevam Signed-off-by: Vinod Koul --- include/linux/platform_data/dma-imx-sdma.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/dma-imx-sdma.h b/include/linux/platform_data/dma-imx-sdma.h index eabac4e2fc99..2d08816720f6 100644 --- a/include/linux/platform_data/dma-imx-sdma.h +++ b/include/linux/platform_data/dma-imx-sdma.h @@ -48,6 +48,9 @@ struct sdma_script_start_addrs { s32 ssish_2_mcu_addr; s32 hdmi_dma_addr; /* End of v2 array */ + s32 zcanfd_2_mcu_addr; + s32 zqspi_2_mcu_addr; + /* End of v3 array */ }; /** -- cgit v1.2.3 From 351d224f64afc1b3b359a1738b7d4600c7e64061 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 12 Mar 2015 17:17:58 +0100 Subject: of: base: add function to get highest id of an alias stem I2C supports adding adapters using either a dynamic or fixed id. The latter is provided by aliases in the DT case. To prevent id collisions of those two types, install this function which gives us the highest fixed id, so we can then let the dynamically created ones come after this highest number. Signed-off-by: Wolfram Sang Acked-by: Rob Herring Signed-off-by: Wolfram Sang --- include/linux/of.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index dfde07e77a63..9bfcc18ceab3 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -332,6 +332,7 @@ extern int of_count_phandle_with_args(const struct device_node *np, extern void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)); extern int of_alias_get_id(struct device_node *np, const char *stem); +extern int of_alias_get_highest_id(const char *stem); extern int of_machine_is_compatible(const char *compat); @@ -594,6 +595,11 @@ static inline int of_alias_get_id(struct device_node *np, const char *stem) return -ENOSYS; } +static inline int of_alias_get_highest_id(const char *stem) +{ + return -ENOSYS; +} + static inline int of_machine_is_compatible(const char *compat) { return 0; -- cgit v1.2.3 From 431d452af13720463dda498999b2e9a08729c03a Mon Sep 17 00:00:00 2001 From: Zhonghui Fu Date: Wed, 18 Mar 2015 15:54:27 +0100 Subject: PM / sleep: add pm-trace support for suspending phase Occasionally, the system can't come back up after suspend/resume due to problems of device suspending phase. This patch make PM_TRACE infrastructure cover device suspending phase of suspend/resume process, and the information in RTC can tell developers which device suspending function make system hang. Signed-off-by: Zhonghui Fu Signed-off-by: Rafael J. Wysocki --- include/linux/pm-trace.h | 35 +++++++++++++++++++++++++++++++++++ include/linux/resume-trace.h | 34 ---------------------------------- 2 files changed, 35 insertions(+), 34 deletions(-) create mode 100644 include/linux/pm-trace.h delete mode 100644 include/linux/resume-trace.h (limited to 'include/linux') diff --git a/include/linux/pm-trace.h b/include/linux/pm-trace.h new file mode 100644 index 000000000000..ecbde7a5548e --- /dev/null +++ b/include/linux/pm-trace.h @@ -0,0 +1,35 @@ +#ifndef PM_TRACE_H +#define PM_TRACE_H + +#ifdef CONFIG_PM_TRACE +#include +#include + +extern int pm_trace_enabled; + +static inline int pm_trace_is_enabled(void) +{ + return pm_trace_enabled; +} + +struct device; +extern void set_trace_device(struct device *); +extern void generate_pm_trace(const void *tracedata, unsigned int user); +extern int show_trace_dev_match(char *buf, size_t size); + +#define TRACE_DEVICE(dev) do { \ + if (pm_trace_enabled) \ + set_trace_device(dev); \ + } while(0) + +#else + +static inline int pm_trace_is_enabled(void) { return 0; } + +#define TRACE_DEVICE(dev) do { } while (0) +#define TRACE_RESUME(dev) do { } while (0) +#define TRACE_SUSPEND(dev) do { } while (0) + +#endif + +#endif diff --git a/include/linux/resume-trace.h b/include/linux/resume-trace.h deleted file mode 100644 index f31db2368782..000000000000 --- a/include/linux/resume-trace.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef RESUME_TRACE_H -#define RESUME_TRACE_H - -#ifdef CONFIG_PM_TRACE -#include -#include - -extern int pm_trace_enabled; - -static inline int pm_trace_is_enabled(void) -{ - return pm_trace_enabled; -} - -struct device; -extern void set_trace_device(struct device *); -extern void generate_resume_trace(const void *tracedata, unsigned int user); -extern int show_trace_dev_match(char *buf, size_t size); - -#define TRACE_DEVICE(dev) do { \ - if (pm_trace_enabled) \ - set_trace_device(dev); \ - } while(0) - -#else - -static inline int pm_trace_is_enabled(void) { return 0; } - -#define TRACE_DEVICE(dev) do { } while (0) -#define TRACE_RESUME(dev) do { } while (0) - -#endif - -#endif -- cgit v1.2.3 From 1f874edcb7318c5dd71025df9f3849715b4e4f71 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Wed, 11 Feb 2015 12:44:45 +0800 Subject: usb: chipidea: add runtime power management support Add runtime power management support. Signed-off-by: Peter Chen Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/chipidea.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h index 535997a6681b..39ba00f0d1d5 100644 --- a/include/linux/usb/chipidea.h +++ b/include/linux/usb/chipidea.h @@ -19,6 +19,7 @@ struct ci_hdrc_platform_data { enum usb_phy_interface phy_mode; unsigned long flags; #define CI_HDRC_REGS_SHARED BIT(0) +#define CI_HDRC_SUPPORTS_RUNTIME_PM BIT(2) #define CI_HDRC_DISABLE_STREAMING BIT(3) /* * Only set it when DCCPARAMS.DC==1 and DCCPARAMS.HC==1, -- cgit v1.2.3 From 6adb9b7b5fb64be3c3e4d57578ea1446da91a8f9 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Wed, 11 Feb 2015 12:45:01 +0800 Subject: usb: chipidea: add a flag for turn on vbus early for host Some usb PHYs need power supply from vbus to make it work, eg mxs-phy, if there is no vbus, USB PHY will not in correct state when the controller starts to work, for host, this requires vbus should be turned on before setting port power(PP) of ehci, to work with this kind of USB PHY design, this patch adds a flag CI_HDRC_TURN_VBUS_EARLY_ON, can be checked by host driver to turn on vbus while start host. Signed-off-by: Li Jun Signed-off-by: Peter Chen Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/chipidea.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h index 39ba00f0d1d5..ab94f78c4dd1 100644 --- a/include/linux/usb/chipidea.h +++ b/include/linux/usb/chipidea.h @@ -28,6 +28,7 @@ struct ci_hdrc_platform_data { #define CI_HDRC_DUAL_ROLE_NOT_OTG BIT(4) #define CI_HDRC_IMX28_WRITE_FIX BIT(5) #define CI_HDRC_FORCE_FULLSPEED BIT(6) +#define CI_HDRC_TURN_VBUS_EARLY_ON BIT(7) enum usb_dr_mode dr_mode; #define CI_HDRC_CONTROLLER_RESET_EVENT 0 #define CI_HDRC_CONTROLLER_STOPPED_EVENT 1 -- cgit v1.2.3 From c5b68807c6563bf3882c94268ca09198d2e574ae Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Sun, 15 Mar 2015 18:03:00 +0100 Subject: uwb: Remove umc bus legacy suspend/resume support There are currently no umc drivers implementing suspend/resume, so remove the legacy suspend/resume support from the framework. If a umc driver ever wants to implement suspend/resume they can use dev_pm_ops, which works out of the box without any additional support necessary from the bus itself. Signed-off-by: Lars-Peter Clausen Signed-off-by: Greg Kroah-Hartman --- include/linux/uwb/umc.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uwb/umc.h b/include/linux/uwb/umc.h index ba82f03d8287..02112299a1d3 100644 --- a/include/linux/uwb/umc.h +++ b/include/linux/uwb/umc.h @@ -87,8 +87,6 @@ struct umc_driver { int (*probe)(struct umc_dev *); void (*remove)(struct umc_dev *); - int (*suspend)(struct umc_dev *, pm_message_t state); - int (*resume)(struct umc_dev *); int (*pre_reset)(struct umc_dev *); int (*post_reset)(struct umc_dev *); -- cgit v1.2.3 From 32fc9eb5b2dcebefb7fb4c8288c739c320362524 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 13 Mar 2015 11:09:42 -0700 Subject: usb: phy: msm: Remove dead code This code is no longer used now that mach-msm has been removed. Delete it. Cc: Felipe Balbi Cc: Greg Kroah-Hartman Cc: linux-usb@vger.kernel.org Cc: David Brown Cc: Bryan Huntsman Cc: Daniel Walker Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/msm_hsusb.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/msm_hsusb.h b/include/linux/usb/msm_hsusb.h index b0a39243295a..7dbecf9a4656 100644 --- a/include/linux/usb/msm_hsusb.h +++ b/include/linux/usb/msm_hsusb.h @@ -117,8 +117,6 @@ struct msm_otg_platform_data { enum otg_control_type otg_control; enum msm_usb_phy_type phy_type; void (*setup_gpio)(enum usb_otg_state state); - int (*link_clk_reset)(struct clk *link_clk, bool assert); - int (*phy_clk_reset)(struct clk *phy_clk); }; /** @@ -128,7 +126,6 @@ struct msm_otg_platform_data { * @irq: IRQ number assigned for HSUSB controller. * @clk: clock struct of usb_hs_clk. * @pclk: clock struct of usb_hs_pclk. - * @phy_reset_clk: clock struct of usb_phy_clk. * @core_clk: clock struct of usb_hs_core_clk. * @regs: ioremapped register base address. * @inputs: OTG state machine inputs(Id, SessValid etc). @@ -148,7 +145,6 @@ struct msm_otg { int irq; struct clk *clk; struct clk *pclk; - struct clk *phy_reset_clk; struct clk *core_clk; void __iomem *regs; #define ID 0 -- cgit v1.2.3 From 6aebd940840a4d3a0a8ffc5883d3892f4bd61e90 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 18 Mar 2015 20:01:15 +1100 Subject: rhashtable: Remove shift from bucket_table Keeping both size and shift is silly. We only need one. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 1695378b3c5b..f16e85692959 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -51,7 +51,6 @@ struct rhash_head { * @size: Number of hash buckets * @rehash: Current bucket being rehashed * @hash_rnd: Random seed to fold into hash - * @shift: Current size (1 << shift) * @locks_mask: Mask to apply before accessing locks[] * @locks: Array of spinlocks protecting individual buckets * @walkers: List of active walkers @@ -63,7 +62,6 @@ struct bucket_table { unsigned int size; unsigned int rehash; u32 hash_rnd; - u32 shift; unsigned int locks_mask; spinlock_t *locks; struct list_head walkers; -- cgit v1.2.3 From c2e213cff701fce71a0aba8de82f2c2a4acf52ae Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 18 Mar 2015 20:01:16 +1100 Subject: rhashtable: Introduce max_size/min_size This patch adds the parameters max_size and min_size which are meant to replace max_shift and min_shift. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index f16e85692959..81267fef85d7 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -85,6 +85,8 @@ struct rhashtable; * @head_offset: Offset of rhash_head in struct to be hashed * @max_shift: Maximum number of shifts while expanding * @min_shift: Minimum number of shifts while shrinking + * @max_size: Maximum size while expanding + * @min_size: Minimum size while shrinking * @nulls_base: Base value to generate nulls marker * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) * @hashfn: Function to hash key @@ -97,6 +99,8 @@ struct rhashtable_params { size_t head_offset; size_t max_shift; size_t min_shift; + unsigned int max_size; + unsigned int min_size; u32 nulls_base; size_t locks_mul; rht_hashfn_t hashfn; -- cgit v1.2.3 From e2e21c1c5808e5dfd88d3606cd6386cf85f6f5b1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 18 Mar 2015 20:01:21 +1100 Subject: rhashtable: Remove max_shift and min_shift Now that nobody uses max_shift and min_shift, we can safely remove them. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 81267fef85d7..99425f2be708 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -83,8 +83,6 @@ struct rhashtable; * @key_len: Length of key * @key_offset: Offset of key in struct to be hashed * @head_offset: Offset of rhash_head in struct to be hashed - * @max_shift: Maximum number of shifts while expanding - * @min_shift: Minimum number of shifts while shrinking * @max_size: Maximum size while expanding * @min_size: Minimum size while shrinking * @nulls_base: Base value to generate nulls marker @@ -97,8 +95,6 @@ struct rhashtable_params { size_t key_len; size_t key_offset; size_t head_offset; - size_t max_shift; - size_t min_shift; unsigned int max_size; unsigned int min_size; u32 nulls_base; -- cgit v1.2.3 From f0b7d43c8a28155f50adb087a563cfc97566e477 Mon Sep 17 00:00:00 2001 From: Brad Campbell Date: Tue, 17 Mar 2015 16:25:46 -0400 Subject: cc2520: Add support for CC2591 amplifier. The TI CC2521 is an RF power amplifier that is designed to interface with the CC2520. Conveniently, it directly interfaces with the CC2520 and does not require any pins to be connected to a microcontroller/processor. Adding a CC2591 increases the CC2520's range, which is useful for border router and other wall-powered applications. Using the CC2591 with the CC2520 requires configuring the CC2520 GPIOs that are connected to the CC2591 to correctly set the CC2591 into TX and RX modes. Further, TI recommends that the CC2520_TXPOWER and CC2520_AGCCTRL1 registers are set differently to maximize the CC2591's performance. These settings are covered in TI Application Note AN065. This patch adds an optional `amplified` field to the cc2520 entry in the device tree. If present, the CC2520 will be configured to operate with a CC2591. The expected pin mapping is: CC2520 GPIO0 --> CC2591 EN CC2520 GPIO5 --> CC2591 PAEN Signed-off-by: Brad Campbell Acked-by: Varka Bhadram Signed-off-by: Marcel Holtmann --- include/linux/spi/cc2520.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/spi/cc2520.h b/include/linux/spi/cc2520.h index 85b8ee67e937..e741e8baad92 100644 --- a/include/linux/spi/cc2520.h +++ b/include/linux/spi/cc2520.h @@ -21,6 +21,7 @@ struct cc2520_platform_data { int sfd; int reset; int vreg; + bool amplified; }; #endif -- cgit v1.2.3 From 822b3b2ebfff8e9b3d006086c527738a7ca00cd0 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 18 Mar 2015 14:57:33 +0200 Subject: net: Add max rate tx queue attribute This adds a tx_maxrate attribute to the tx queue sysfs entry allowing for max-rate limiting. Along with DCB-ETS and BQL this provides another knob to tune queue performance. The limit units are Mbps. By default it is disabled. To disable the rate limitation after it has been set for a queue, it should be set to zero. Signed-off-by: John Fastabend Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index dd1d069758be..76c5de4978a8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -587,6 +587,7 @@ struct netdev_queue { #ifdef CONFIG_BQL struct dql dql; #endif + unsigned long tx_maxrate; } ____cacheline_aligned_in_smp; static inline int netdev_queue_numa_node_read(const struct netdev_queue *q) @@ -1022,6 +1023,10 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * be otherwise expressed by feature flags. The check is called with * the set of features that the stack has calculated and it returns * those the driver believes to be appropriate. + * int (*ndo_set_tx_maxrate)(struct net_device *dev, + * int queue_index, u32 maxrate); + * Called when a user wants to set a max-rate limitation of specific + * TX queue. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1178,6 +1183,9 @@ struct net_device_ops { netdev_features_t (*ndo_features_check) (struct sk_buff *skb, struct net_device *dev, netdev_features_t features); + int (*ndo_set_tx_maxrate)(struct net_device *dev, + int queue_index, + u32 maxrate); }; /** -- cgit v1.2.3 From fc31e2560a2443410fe45c27116fae736541a7b5 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 18 Mar 2015 14:57:34 +0200 Subject: net/mlx4_core: Add basic support for QP max-rate limiting Add the low-level device commands and definitions used for QP max-rate limiting. This is done through the following elements: - read rate-limit device caps in QUERY_DEV_CAP: number of different rates and the min/max rates in Kbs/Mbs/Gbs units - enhance the QP context struct to contain rate limit units and value - allow to do run time rate-limit setting to QPs through the update-qp firmware command - QP rate-limiting is disallowed for VFs Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 17 +++++++++++++++++ include/linux/mlx4/qp.h | 14 ++++++++++---- 2 files changed, 27 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 1cc54822b931..4550c67b92e4 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -205,6 +205,7 @@ enum { MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 20, MLX4_DEV_CAP_FLAG2_PORT_REMAP = 1LL << 21, MLX4_DEV_CAP_FLAG2_QCN = 1LL << 22, + MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT = 1LL << 23 }; enum { @@ -450,6 +451,21 @@ enum mlx4_module_id { MLX4_MODULE_ID_QSFP28 = 0x11, }; +enum { /* rl */ + MLX4_QP_RATE_LIMIT_NONE = 0, + MLX4_QP_RATE_LIMIT_KBS = 1, + MLX4_QP_RATE_LIMIT_MBS = 2, + MLX4_QP_RATE_LIMIT_GBS = 3 +}; + +struct mlx4_rate_limit_caps { + u16 num_rates; /* Number of different rates */ + u8 min_unit; + u16 min_val; + u8 max_unit; + u16 max_val; +}; + static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor) { return (major << 32) | (minor << 16) | subminor; @@ -565,6 +581,7 @@ struct mlx4_caps { u32 dmfs_high_rate_qpn_base; u32 dmfs_high_rate_qpn_range; u32 vf_caps; + struct mlx4_rate_limit_caps rl_caps; }; struct mlx4_buf_list { diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 551f85456c11..1023ebe035b7 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -207,14 +207,16 @@ struct mlx4_qp_context { __be32 msn; __be16 rq_wqe_counter; __be16 sq_wqe_counter; - u32 reserved3[2]; + u32 reserved3; + __be16 rate_limit_params; + __be16 reserved4; __be32 param3; __be32 nummmcpeers_basemkey; u8 log_page_size; - u8 reserved4[2]; + u8 reserved5[2]; u8 mtt_base_addr_h; __be32 mtt_base_addr_l; - u32 reserved5[10]; + u32 reserved6[10]; }; struct mlx4_update_qp_context { @@ -229,6 +231,7 @@ struct mlx4_update_qp_context { enum { MLX4_UPD_QP_MASK_PM_STATE = 32, MLX4_UPD_QP_MASK_VSD = 33, + MLX4_UPD_QP_MASK_RATE_LIMIT = 35, }; enum { @@ -428,7 +431,8 @@ struct mlx4_wqe_inline_seg { enum mlx4_update_qp_attr { MLX4_UPDATE_QP_SMAC = 1 << 0, MLX4_UPDATE_QP_VSD = 1 << 1, - MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 2) - 1 + MLX4_UPDATE_QP_RATE_LIMIT = 1 << 2, + MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 3) - 1 }; enum mlx4_update_qp_params_flags { @@ -438,6 +442,8 @@ enum mlx4_update_qp_params_flags { struct mlx4_update_qp_params { u8 smac_index; u32 flags; + u16 rate_unit; + u16 rate_val; }; int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn, -- cgit v1.2.3 From 847aac644e92e5624f2c153bab409bf713d5ff9a Mon Sep 17 00:00:00 2001 From: Li Xi Date: Thu, 19 Mar 2015 04:04:53 +0900 Subject: vfs: Add general support to enforce project quota limits This patch adds support for a new quota type PRJQUOTA for project quota enforcement. Also a new method get_projid() is added into dquot_operations structure. Signed-off-by: Li Xi Signed-off-by: Dmitry Monakhov Reviewed-by: Jan Kara Signed-off-by: Jan Kara --- include/linux/quota.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/quota.h b/include/linux/quota.h index cf910d1f8efa..b2505acfd3c0 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -50,6 +50,7 @@ #undef USRQUOTA #undef GRPQUOTA +#undef PRJQUOTA enum quota_type { USRQUOTA = 0, /* element used for user quotas */ GRPQUOTA = 1, /* element used for group quotas */ @@ -319,6 +320,7 @@ struct dquot_operations { /* get reserved quota for delayed alloc, value returned is managed by * quota code only */ qsize_t *(*get_reserved_space) (struct inode *); + int (*get_projid) (struct inode *, kprojid_t *);/* Get project ID */ }; struct path; -- cgit v1.2.3 From 95c0fd457b03099b5ce385f3982e06d01ad42c5c Mon Sep 17 00:00:00 2001 From: Peter Huewe Date: Mon, 16 Mar 2015 21:46:30 +0100 Subject: PNP: Add helper macro for pnp_register_driver boilerplate This patch introduces the module_pnp_driver macro which is a convenience macro for PNP driver modules similar to module_pci_driver. It is intended to be used by drivers which init/exit section does nothing but register/unregister the PNP driver. By using this macro it is possible to eliminate a few lines of boilerplate code per PNP driver. Based on work done by Lars-Peter Clausen for other busses (i2c and spi) and Greg KH for PCI. Signed-off-by: Peter Huewe Signed-off-by: Rafael J. Wysocki --- include/linux/pnp.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pnp.h b/include/linux/pnp.h index 6512e9cbc6d5..5df733b8f704 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -510,4 +510,16 @@ static inline void pnp_unregister_driver(struct pnp_driver *drv) { } #endif /* CONFIG_PNP */ +/** + * module_pnp_driver() - Helper macro for registering a PnP driver + * @__pnp_driver: pnp_driver struct + * + * Helper macro for PnP drivers which do not do anything special in module + * init/exit. This eliminates a lot of boilerplate. Each module may only + * use this macro once, and calling it replaces module_init() and module_exit() + */ +#define module_pnp_driver(__pnp_driver) \ + module_driver(__pnp_driver, pnp_register_driver, \ + pnp_unregister_driver) + #endif /* _LINUX_PNP_H */ -- cgit v1.2.3 From bf38b8710892333cec2d8069644eb36ff435fd6f Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Sun, 8 Mar 2015 11:17:14 +0100 Subject: tpm/tpm_i2c_stm_st33: Split tpm_i2c_tpm_st33 in 2 layers (core + phy) tpm_i2c_stm_st33 is a TIS 1.2 TPM with a core interface which can be used by different phy such as i2c or spi. The core part is called st33zp24 which is also the main part reference. include/linux/platform_data/tpm_stm_st33.h is renamed consequently. The driver is also split into an i2c phy in charge of sending/receiving data as well as managing platform data or dts configuration. Acked-by: Jarkko Sakkinen Reviewed-by: Jason Gunthorpe Signed-off-by: Christophe Ricard Signed-off-by: Peter Huewe --- include/linux/platform_data/st33zp24.h | 28 +++++++++++++++++++++ include/linux/platform_data/tpm_stm_st33.h | 39 ------------------------------ 2 files changed, 28 insertions(+), 39 deletions(-) create mode 100644 include/linux/platform_data/st33zp24.h delete mode 100644 include/linux/platform_data/tpm_stm_st33.h (limited to 'include/linux') diff --git a/include/linux/platform_data/st33zp24.h b/include/linux/platform_data/st33zp24.h new file mode 100644 index 000000000000..817dfdb37885 --- /dev/null +++ b/include/linux/platform_data/st33zp24.h @@ -0,0 +1,28 @@ +/* + * STMicroelectronics TPM Linux driver for TPM 1.2 ST33ZP24 + * Copyright (C) 2009 - 2015 STMicroelectronics + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ +#ifndef __ST33ZP24_H__ +#define __ST33ZP24_H__ + +#define TPM_ST33_I2C "st33zp24-i2c" +#define TPM_ST33_SPI "st33zp24-spi" + +struct st33zp24_platform_data { + int io_lpcpd; +}; + +#endif /* __ST33ZP24_H__ */ diff --git a/include/linux/platform_data/tpm_stm_st33.h b/include/linux/platform_data/tpm_stm_st33.h deleted file mode 100644 index ff75310c0f47..000000000000 --- a/include/linux/platform_data/tpm_stm_st33.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * STMicroelectronics TPM I2C Linux driver for TPM ST33ZP24 - * Copyright (C) 2009, 2010 STMicroelectronics - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - * STMicroelectronics version 1.2.0, Copyright (C) 2010 - * STMicroelectronics comes with ABSOLUTELY NO WARRANTY. - * This is free software, and you are welcome to redistribute it - * under certain conditions. - * - * @Author: Christophe RICARD tpmsupport@st.com - * - * @File: stm_st33_tpm.h - * - * @Date: 09/15/2010 - */ -#ifndef __STM_ST33_TPM_H__ -#define __STM_ST33_TPM_H__ - -#define TPM_ST33_I2C "st33zp24-i2c" -#define TPM_ST33_SPI "st33zp24-spi" - -struct st33zp24_platform_data { - int io_lpcpd; -}; - -#endif /* __STM_ST33_TPM_H__ */ -- cgit v1.2.3 From 6eada0110c8984477f5f1e57a0b7f7b2fc841e30 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Mar 2015 14:05:33 -0700 Subject: netns: constify net_hash_mix() and various callers const qualifiers ease code review by making clear which objects are not written in a function. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/udp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 247cfdcc4b08..87c094961bd5 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -34,7 +34,7 @@ static inline struct udphdr *inner_udp_hdr(const struct sk_buff *skb) #define UDP_HTABLE_SIZE_MIN (CONFIG_BASE_SMALL ? 128 : 256) -static inline int udp_hashfn(struct net *net, unsigned num, unsigned mask) +static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask) { return (num + net_hash_mix(net)) & mask; } -- cgit v1.2.3 From 54ff9ef36bdf84d469a098cbf8e2a103fbc77054 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 18 Mar 2015 14:50:43 -0300 Subject: ipv4, ipv6: kill ip_mc_{join, leave}_group and ipv6_sock_mc_{join, drop} in favor of their inner __ ones, which doesn't grab rtnl. As these functions need to operate on a locked socket, we can't be grabbing rtnl by then. It's too late and doing so causes reversed locking. So this patch: - move rtnl handling to callers instead while already fixing some reversed locking situations, like on vxlan and ipvs code. - renames __ ones to not have the __ mark: __ip_mc_{join,leave}_group -> ip_mc_{join,leave}_group __ipv6_sock_mc_{join,drop} -> ipv6_sock_mc_{join,drop} Signed-off-by: Marcelo Ricardo Leitner Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/igmp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index b5a6470e686c..2c677afeea47 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -111,9 +111,7 @@ struct ip_mc_list { extern int ip_check_mc_rcu(struct in_device *dev, __be32 mc_addr, __be32 src_addr, u16 proto); extern int igmp_rcv(struct sk_buff *); -extern int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr); extern int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr); -extern int __ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr); extern int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr); extern void ip_mc_drop_socket(struct sock *sk); extern int ip_mc_source(int add, int omode, struct sock *sk, -- cgit v1.2.3 From db24a9044ee191c397dcd1c6574f56d67d7c8df5 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 17 Mar 2015 20:23:15 -0600 Subject: net: add support for phys_port_name Similar to port id allow netdevices to specify port names and export the name via sysfs. Drivers can implement the netdevice operation to assist udev in having sane default names for the devices using the rule: $ cat /etc/udev/rules.d/80-net-setup-link.rules SUBSYSTEM=="net", ACTION=="add", ATTR{phys_port_name}!="", NAME="$attr{phys_port_name}" Use of phys_name versus phys_id was suggested-by Jiri Pirko. Signed-off-by: David Ahern Acked-by: Jiri Pirko Acked-by: Scott Feldman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 76c5de4978a8..ec8f9b5f6500 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1164,6 +1164,8 @@ struct net_device_ops { bool new_carrier); int (*ndo_get_phys_port_id)(struct net_device *dev, struct netdev_phys_item_id *ppid); + int (*ndo_get_phys_port_name)(struct net_device *dev, + char *name, size_t len); void (*ndo_add_vxlan_port)(struct net_device *dev, sa_family_t sa_family, __be16 port); @@ -2947,6 +2949,8 @@ int dev_set_mac_address(struct net_device *, struct sockaddr *); int dev_change_carrier(struct net_device *, bool new_carrier); int dev_get_phys_port_id(struct net_device *dev, struct netdev_phys_item_id *ppid); +int dev_get_phys_port_name(struct net_device *dev, + char *name, size_t len); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); -- cgit v1.2.3 From 99c4a26a159b28fa46a3e746a9b41b297e73d261 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 18 Mar 2015 22:52:33 -0400 Subject: net: Fix high overhead of vlan sub-device teardown. When a networking device is taken down that has a non-trivial number of VLAN devices configured under it, we eat a full synchronize_net() for every such VLAN device. This is because of the call chain: NETDEV_DOWN notifier --> vlan_device_event() --> dev_change_flags() --> __dev_change_flags() --> __dev_close() --> __dev_close_many() --> dev_deactivate_many() --> synchronize_net() This is kind of rediculous because we already have infrastructure for batching doing operation X to a list of net devices so that we only incur one sync. So make use of that by exporting dev_close_many() and adjusting it's interfaace so that the caller can fully manage the batch list. Use this in vlan_device_event() and all the overhead goes away. Reported-by: Salam Noureddine Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ec8f9b5f6500..76951c5fbedf 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2156,6 +2156,7 @@ struct net_device *__dev_get_by_name(struct net *net, const char *name); int dev_alloc_name(struct net_device *dev, const char *name); int dev_open(struct net_device *dev); int dev_close(struct net_device *dev); +int dev_close_many(struct list_head *head, bool unlink); void dev_disable_lro(struct net_device *dev); int dev_loopback_xmit(struct sk_buff *newskb); int dev_queue_xmit(struct sk_buff *skb); -- cgit v1.2.3 From 964cb341882f920a1a1043864178f22def3193e4 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 18 Mar 2015 01:56:17 +0100 Subject: gpio: move pincontrol calls to These functions do not belong in since the split into separate GPIO headers under . Move them to as is apropriate. Acked-by: Alexandre Courbot Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 48 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index c497c62889d1..f1b36593ec9f 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -6,6 +6,7 @@ #include #include #include +#include struct device; struct gpio_desc; @@ -173,6 +174,53 @@ int gpiochip_irqchip_add(struct gpio_chip *gpiochip, #endif /* CONFIG_GPIOLIB_IRQCHIP */ +#ifdef CONFIG_PINCTRL + +/** + * struct gpio_pin_range - pin range controlled by a gpio chip + * @head: list for maintaining set of pin ranges, used internally + * @pctldev: pinctrl device which handles corresponding pins + * @range: actual range of pins controlled by a gpio controller + */ + +struct gpio_pin_range { + struct list_head node; + struct pinctrl_dev *pctldev; + struct pinctrl_gpio_range range; +}; + +int gpiochip_add_pin_range(struct gpio_chip *chip, const char *pinctl_name, + unsigned int gpio_offset, unsigned int pin_offset, + unsigned int npins); +int gpiochip_add_pingroup_range(struct gpio_chip *chip, + struct pinctrl_dev *pctldev, + unsigned int gpio_offset, const char *pin_group); +void gpiochip_remove_pin_ranges(struct gpio_chip *chip); + +#else + +static inline int +gpiochip_add_pin_range(struct gpio_chip *chip, const char *pinctl_name, + unsigned int gpio_offset, unsigned int pin_offset, + unsigned int npins) +{ + return 0; +} +static inline int +gpiochip_add_pingroup_range(struct gpio_chip *chip, + struct pinctrl_dev *pctldev, + unsigned int gpio_offset, const char *pin_group) +{ + return 0; +} + +static inline void +gpiochip_remove_pin_ranges(struct gpio_chip *chip) +{ +} + +#endif /* CONFIG_PINCTRL */ + struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *chip, u16 hwnum, const char *label); void gpiochip_free_own_desc(struct gpio_desc *desc); -- cgit v1.2.3 From dfea9c94837d27e38c8cc85a3c1c7c268973c3c2 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Fri, 6 Mar 2015 10:36:02 +0800 Subject: usb: udc: store usb_udc pointer in struct usb_gadget Instead of iterate to find usb_udc according to usb_gadget, this way is easier. Alan Stern suggests this way too: http://marc.info/?l=linux-usb&m=142168496528894&w=2 Acked-by: Alan Stern Signed-off-by: Peter Chen Signed-off-by: Felipe Balbi --- include/linux/usb/gadget.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 02476b3a1109..242c694a5b85 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -474,6 +474,7 @@ struct usb_dcd_config_params { struct usb_gadget; struct usb_gadget_driver; +struct usb_udc; /* the rest of the api to the controller hardware: device operations, * which don't involve endpoints (or i/o). @@ -496,6 +497,7 @@ struct usb_gadget_ops { /** * struct usb_gadget - represents a usb slave device * @work: (internal use) Workqueue to be used for sysfs_notify() + * @udc: struct usb_udc pointer for this gadget * @ops: Function pointers used to access hardware-specific operations. * @ep0: Endpoint zero, used when reading or writing responses to * driver setup() requests @@ -545,6 +547,7 @@ struct usb_gadget_ops { */ struct usb_gadget { struct work_struct work; + struct usb_udc *udc; /* readonly to gadget driver */ const struct usb_gadget_ops *ops; struct usb_ep *ep0; -- cgit v1.2.3 From 628ef0d273a69d889669a459fb4675c678ae0418 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Fri, 6 Mar 2015 10:36:03 +0800 Subject: usb: udc: add usb_udc_vbus_handler This commit updates udc core vbus status, and try to connect or disconnect gadget. Signed-off-by: Peter Chen Acked-by: Alan Stern Signed-off-by: Felipe Balbi --- include/linux/usb/gadget.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 242c694a5b85..4f3dfb7d0654 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -1032,6 +1032,10 @@ extern void usb_gadget_udc_reset(struct usb_gadget *gadget, extern void usb_gadget_giveback_request(struct usb_ep *ep, struct usb_request *req); +/*-------------------------------------------------------------------------*/ + +/* utility to update vbus status for udc core, it may be scheduled */ +extern void usb_udc_vbus_handler(struct usb_gadget *gadget, bool status); /*-------------------------------------------------------------------------*/ -- cgit v1.2.3 From 3fa0818b3c85e9bb55e3ac96c9523b87e44eab9e Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Mon, 9 Mar 2015 12:12:07 -0400 Subject: sched, isolcpu: make cpu_isolated_map visible outside scheduler Needed by the next patch. Also makes cpu_isolated_map present when compiled without SMP and/or with CONFIG_NR_CPUS=1, like the other cpu masks. At some point we may want to clean things up so cpumasks do not exist in UP kernels. Maybe something for the CONFIG_TINY crowd. Cc: Peter Zijlstra Cc: Clark Williams Cc: Li Zefan Cc: Ingo Molnar Cc: Luiz Capitulino Cc: David Rientjes Cc: Mike Galbraith Cc: cgroups@vger.kernel.org Signed-off-by: Rik van Riel Acked-by: Zefan Li Signed-off-by: Tejun Heo --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6d77432e14ff..ca365d79480c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -329,6 +329,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); extern void init_idle_bootup_task(struct task_struct *idle); +extern cpumask_var_t cpu_isolated_map; + extern int runqueue_is_locked(int cpu); #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) -- cgit v1.2.3 From 6f921fab5844941f7605b7f1a265f5fc7fe969a7 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Wed, 18 Mar 2015 18:38:25 +0200 Subject: wlcore: set irq_trigger in board files instead of hiding behind a quirk The platform_quirk element in the platform data was used to change the way the IRQ is triggered. When set, the EDGE_IRQ quirk would change the irqflags used and treat edge trigger differently from the rest. Instead of hiding this irq flag setting behind the quirk, have the board files set the irq_trigger explicitly. This will allow us to use standard irq DT definitions later on. Signed-off-by: Luciano Coelho [Eliad - rebase, add irq_trigger field and pass it, update board file changes] Signed-off-by: Eliad Peller Tested-by: Nikita Kiryanov Acked-by: Kalle Valo Acked-by: Sekhar Nori Signed-off-by: Tony Lindgren --- include/linux/wl12xx.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h index a9c723be1acf..3876b67dbcbc 100644 --- a/include/linux/wl12xx.h +++ b/include/linux/wl12xx.h @@ -57,15 +57,12 @@ struct wl1251_platform_data { struct wl12xx_platform_data { int irq; + u32 irq_trigger; int board_ref_clock; int board_tcxo_clock; - unsigned long platform_quirks; bool pwr_in_suspend; }; -/* Platform does not support level trigger interrupts */ -#define WL12XX_PLATFORM_QUIRK_EDGE_IRQ BIT(0) - #ifdef CONFIG_WILINK_PLATFORM_DATA int wl12xx_set_platform_data(const struct wl12xx_platform_data *data); -- cgit v1.2.3 From 25911556283e5093fca235d7ba03faae7ed5dbf2 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Wed, 18 Mar 2015 13:25:26 +0100 Subject: brcmfmac: add support for BCM43430 SDIO chipset This patch added support for the BCM43430 802.11n SDIO chipset. Reviewed-by: Hante Meuleman Reviewed-by: Daniel (Deognyoun) Kim Reviewed-by: Franky (Zhenhui) Lin Reviewed-by: Pieter-Paul Giesberts Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- include/linux/mmc/sdio_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 91397858dc95..83430f2ea757 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -33,6 +33,7 @@ #define SDIO_DEVICE_ID_BROADCOM_43341 0xa94d #define SDIO_DEVICE_ID_BROADCOM_4335_4339 0x4335 #define SDIO_DEVICE_ID_BROADCOM_43362 0xa962 +#define SDIO_DEVICE_ID_BROADCOM_43430 0xa9a6 #define SDIO_DEVICE_ID_BROADCOM_4345 0x4345 #define SDIO_DEVICE_ID_BROADCOM_4354 0x4354 -- cgit v1.2.3 From 488fb86ee91d3b1182c2e30a9f9b45da14eda46f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 20 Mar 2015 21:56:59 +1100 Subject: rhashtable: Make rhashtable_init params argument const This patch marks the rhashtable_init params argument const as there is no reason to modify it since we will always make a copy of it in the rhashtable. This patch also fixes a bug where we don't actually round up the value of min_size unless it is less than HASH_MIN_SIZE. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 99425f2be708..c85363c45fc0 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -181,7 +181,8 @@ static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, } #endif /* CONFIG_PROVE_LOCKING */ -int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params); +int rhashtable_init(struct rhashtable *ht, + const struct rhashtable_params *params); void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node); bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node); -- cgit v1.2.3 From 02fd97c3d4a8a14e222b0021c366db7041d28743 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 20 Mar 2015 21:57:00 +1100 Subject: rhashtable: Allow hash/comparison functions to be inlined This patch deals with the complaint that we make indirect function calls on the fast paths unnecessarily in rhashtable. We resolve it by moving the fast paths into inline functions that take struct rhashtable_param (which obviously must be the same set of parameters supplied to rhashtable_init) as an argument. The only remaining indirect call is to obj_hashfn (or key_hashfn it obj_hashfn is unset) on the rehash as well as the insert-during- rehash slow path. This patch also extends the support of vairable-length keys to include those where the key is fixed but scattered in the object. For example, in netlink we want to key off the namespace and the portid but they're not next to each other. This patch does this by directly using the object hash function as the indicator of whether the key is accessible or not. It also adds a new function obj_cmpfn to compare a key against an object. This means that the caller no longer needs to supply explicit compare functions. All this is done in a backwards compatible manner so no existing users are affected until they convert to the new interface. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 386 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 386 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index c85363c45fc0..a7188eeb135b 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -22,6 +22,7 @@ #include #include #include +#include /* * The end of the chain is marked with a special nulls marks which has @@ -42,6 +43,9 @@ #define RHT_HASH_BITS 27 #define RHT_BASE_SHIFT RHT_HASH_BITS +/* Base bits plus 1 bit for nulls marker */ +#define RHT_HASH_RESERVED_SPACE (RHT_BASE_BITS + 1) + struct rhash_head { struct rhash_head __rcu *next; }; @@ -72,8 +76,20 @@ struct bucket_table { struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; +/** + * struct rhashtable_compare_arg - Key for the function rhashtable_compare + * @ht: Hash table + * @key: Key to compare against + */ +struct rhashtable_compare_arg { + struct rhashtable *ht; + const void *key; +}; + typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed); typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 seed); +typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg, + const void *obj); struct rhashtable; @@ -89,6 +105,7 @@ struct rhashtable; * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) * @hashfn: Function to hash key * @obj_hashfn: Function to hash object + * @obj_cmpfn: Function to compare key with object */ struct rhashtable_params { size_t nelem_hint; @@ -101,6 +118,7 @@ struct rhashtable_params { size_t locks_mul; rht_hashfn_t hashfn; rht_obj_hashfn_t obj_hashfn; + rht_obj_cmpfn_t obj_cmpfn; }; /** @@ -165,6 +183,83 @@ static inline unsigned long rht_get_nulls_value(const struct rhash_head *ptr) return ((unsigned long) ptr) >> 1; } +static inline void *rht_obj(const struct rhashtable *ht, + const struct rhash_head *he) +{ + return (char *)he - ht->p.head_offset; +} + +static inline unsigned int rht_bucket_index(const struct bucket_table *tbl, + unsigned int hash) +{ + return (hash >> RHT_HASH_RESERVED_SPACE) & (tbl->size - 1); +} + +static inline unsigned int rht_key_hashfn( + struct rhashtable *ht, const struct bucket_table *tbl, + const void *key, const struct rhashtable_params params) +{ + return rht_bucket_index(tbl, params.hashfn(key, params.key_len ?: + ht->p.key_len, + tbl->hash_rnd)); +} + +static inline unsigned int rht_head_hashfn( + struct rhashtable *ht, const struct bucket_table *tbl, + const struct rhash_head *he, const struct rhashtable_params params) +{ + const char *ptr = rht_obj(ht, he); + + return likely(params.obj_hashfn) ? + rht_bucket_index(tbl, params.obj_hashfn(ptr, tbl->hash_rnd)) : + rht_key_hashfn(ht, tbl, ptr + params.key_offset, params); +} + +/** + * rht_grow_above_75 - returns true if nelems > 0.75 * table-size + * @ht: hash table + * @tbl: current table + */ +static inline bool rht_grow_above_75(const struct rhashtable *ht, + const struct bucket_table *tbl) +{ + /* Expand table when exceeding 75% load */ + return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) && + (!ht->p.max_size || tbl->size < ht->p.max_size); +} + +/** + * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size + * @ht: hash table + * @tbl: current table + */ +static inline bool rht_shrink_below_30(const struct rhashtable *ht, + const struct bucket_table *tbl) +{ + /* Shrink table beneath 30% load */ + return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) && + tbl->size > ht->p.min_size; +} + +/* The bucket lock is selected based on the hash and protects mutations + * on a group of hash buckets. + * + * A maximum of tbl->size/2 bucket locks is allocated. This ensures that + * a single lock always covers both buckets which may both contains + * entries which link to the same bucket of the old table during resizing. + * This allows to simplify the locking as locking the bucket in both + * tables during resize always guarantee protection. + * + * IMPORTANT: When holding the bucket lock of both the old and new table + * during expansions and shrinking, the old bucket lock must always be + * acquired first. + */ +static inline spinlock_t *rht_bucket_lock(const struct bucket_table *tbl, + unsigned int hash) +{ + return &tbl->locks[hash & tbl->locks_mask]; +} + #ifdef CONFIG_PROVE_LOCKING int lockdep_rht_mutex_is_held(struct rhashtable *ht); int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash); @@ -184,6 +279,9 @@ static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, int rhashtable_init(struct rhashtable *ht, const struct rhashtable_params *params); +int rhashtable_insert_slow(struct rhashtable *ht, const void *key, + struct rhash_head *obj, + struct bucket_table *old_tbl); void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node); bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node); @@ -356,4 +454,292 @@ void rhashtable_destroy(struct rhashtable *ht); rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\ tbl, hash, member) +static inline int rhashtable_compare(struct rhashtable_compare_arg *arg, + const void *obj) +{ + struct rhashtable *ht = arg->ht; + const char *ptr = obj; + + return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len); +} + +/** + * rhashtable_lookup_fast - search hash table, inlined version + * @ht: hash table + * @key: the pointer to the key + * @params: hash table parameters + * + * Computes the hash value for the key and traverses the bucket chain looking + * for a entry with an identical key. The first matching entry is returned. + * + * Returns the first entry on which the compare function returned true. + */ +static inline void *rhashtable_lookup_fast( + struct rhashtable *ht, const void *key, + const struct rhashtable_params params) +{ + struct rhashtable_compare_arg arg = { + .ht = ht, + .key = key, + }; + const struct bucket_table *tbl; + struct rhash_head *he; + unsigned hash; + + rcu_read_lock(); + + tbl = rht_dereference_rcu(ht->tbl, ht); +restart: + hash = rht_key_hashfn(ht, tbl, key, params); + rht_for_each_rcu(he, tbl, hash) { + if (params.obj_cmpfn ? + params.obj_cmpfn(&arg, rht_obj(ht, he)) : + rhashtable_compare(&arg, rht_obj(ht, he))) + continue; + rcu_read_unlock(); + return rht_obj(ht, he); + } + + /* Ensure we see any new tables. */ + smp_rmb(); + + tbl = rht_dereference_rcu(tbl->future_tbl, ht); + if (unlikely(tbl)) + goto restart; + rcu_read_unlock(); + + return NULL; +} + +static inline int __rhashtable_insert_fast( + struct rhashtable *ht, const void *key, struct rhash_head *obj, + const struct rhashtable_params params) +{ + struct rhashtable_compare_arg arg = { + .ht = ht, + .key = key, + }; + int err = -EEXIST; + struct bucket_table *tbl, *new_tbl; + struct rhash_head *head; + spinlock_t *lock; + unsigned hash; + + rcu_read_lock(); + + tbl = rht_dereference_rcu(ht->tbl, ht); + hash = rht_head_hashfn(ht, tbl, obj, params); + lock = rht_bucket_lock(tbl, hash); + + spin_lock_bh(lock); + + /* Because we have already taken the bucket lock in tbl, + * if we find that future_tbl is not yet visible then + * that guarantees all other insertions of the same entry + * will also grab the bucket lock in tbl because until + * the rehash completes ht->tbl won't be changed. + */ + new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); + if (unlikely(new_tbl)) { + err = rhashtable_insert_slow(ht, key, obj, new_tbl); + goto out; + } + + if (!key) + goto skip_lookup; + + rht_for_each(head, tbl, hash) { + if (unlikely(!(params.obj_cmpfn ? + params.obj_cmpfn(&arg, rht_obj(ht, head)) : + rhashtable_compare(&arg, rht_obj(ht, head))))) + goto out; + } + +skip_lookup: + err = 0; + + head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); + + RCU_INIT_POINTER(obj->next, head); + + rcu_assign_pointer(tbl->buckets[hash], obj); + + atomic_inc(&ht->nelems); + if (rht_grow_above_75(ht, tbl)) + schedule_work(&ht->run_work); + +out: + spin_unlock_bh(lock); + rcu_read_unlock(); + + return err; +} + +/** + * rhashtable_insert_fast - insert object into hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * @params: hash table parameters + * + * Will take a per bucket spinlock to protect against mutual mutations + * on the same bucket. Multiple insertions may occur in parallel unless + * they map to the same bucket lock. + * + * It is safe to call this function from atomic context. + * + * Will trigger an automatic deferred table resizing if the size grows + * beyond the watermark indicated by grow_decision() which can be passed + * to rhashtable_init(). + */ +static inline int rhashtable_insert_fast( + struct rhashtable *ht, struct rhash_head *obj, + const struct rhashtable_params params) +{ + return __rhashtable_insert_fast(ht, NULL, obj, params); +} + +/** + * rhashtable_lookup_insert_fast - lookup and insert object into hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * @params: hash table parameters + * + * Locks down the bucket chain in both the old and new table if a resize + * is in progress to ensure that writers can't remove from the old table + * and can't insert to the new table during the atomic operation of search + * and insertion. Searches for duplicates in both the old and new table if + * a resize is in progress. + * + * This lookup function may only be used for fixed key hash table (key_len + * parameter set). It will BUG() if used inappropriately. + * + * It is safe to call this function from atomic context. + * + * Will trigger an automatic deferred table resizing if the size grows + * beyond the watermark indicated by grow_decision() which can be passed + * to rhashtable_init(). + */ +static inline int rhashtable_lookup_insert_fast( + struct rhashtable *ht, struct rhash_head *obj, + const struct rhashtable_params params) +{ + const char *key = rht_obj(ht, obj); + + BUG_ON(ht->p.obj_hashfn); + + return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, + params); +} + +/** + * rhashtable_lookup_insert_key - search and insert object to hash table + * with explicit key + * @ht: hash table + * @key: key + * @obj: pointer to hash head inside object + * @params: hash table parameters + * + * Locks down the bucket chain in both the old and new table if a resize + * is in progress to ensure that writers can't remove from the old table + * and can't insert to the new table during the atomic operation of search + * and insertion. Searches for duplicates in both the old and new table if + * a resize is in progress. + * + * Lookups may occur in parallel with hashtable mutations and resizing. + * + * Will trigger an automatic deferred table resizing if the size grows + * beyond the watermark indicated by grow_decision() which can be passed + * to rhashtable_init(). + * + * Returns zero on success. + */ +static inline int rhashtable_lookup_insert_key( + struct rhashtable *ht, const void *key, struct rhash_head *obj, + const struct rhashtable_params params) +{ + BUG_ON(!ht->p.obj_hashfn || !key); + + return __rhashtable_insert_fast(ht, key, obj, params); +} + +static inline int __rhashtable_remove_fast( + struct rhashtable *ht, struct bucket_table *tbl, + struct rhash_head *obj, const struct rhashtable_params params) +{ + struct rhash_head __rcu **pprev; + struct rhash_head *he; + spinlock_t * lock; + unsigned hash; + int err = -ENOENT; + + hash = rht_head_hashfn(ht, tbl, obj, params); + lock = rht_bucket_lock(tbl, hash); + + spin_lock_bh(lock); + + pprev = &tbl->buckets[hash]; + rht_for_each(he, tbl, hash) { + if (he != obj) { + pprev = &he->next; + continue; + } + + rcu_assign_pointer(*pprev, obj->next); + err = 0; + break; + } + + spin_unlock_bh(lock); + + return err; +} + +/** + * rhashtable_remove_fast - remove object from hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * @params: hash table parameters + * + * Since the hash chain is single linked, the removal operation needs to + * walk the bucket chain upon removal. The removal operation is thus + * considerable slow if the hash table is not correctly sized. + * + * Will automatically shrink the table via rhashtable_expand() if the + * shrink_decision function specified at rhashtable_init() returns true. + * + * Returns zero on success, -ENOENT if the entry could not be found. + */ +static inline int rhashtable_remove_fast( + struct rhashtable *ht, struct rhash_head *obj, + const struct rhashtable_params params) +{ + struct bucket_table *tbl; + int err; + + rcu_read_lock(); + + tbl = rht_dereference_rcu(ht->tbl, ht); + + /* Because we have already taken (and released) the bucket + * lock in old_tbl, if we find that future_tbl is not yet + * visible then that guarantees the entry to still be in + * the old tbl if it exists. + */ + while ((err = __rhashtable_remove_fast(ht, tbl, obj, params)) && + (tbl = rht_dereference_rcu(tbl->future_tbl, ht))) + ; + + if (err) + goto out; + + atomic_dec(&ht->nelems); + if (rht_shrink_below_30(ht, tbl)) + schedule_work(&ht->run_work); + +out: + rcu_read_unlock(); + + return err; +} + #endif /* _LINUX_RHASHTABLE_H */ -- cgit v1.2.3 From dc0ee268d85026530720d8c874716287b7ede25b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 20 Mar 2015 21:57:06 +1100 Subject: rhashtable: Rip out obsolete out-of-line interface Now that all rhashtable users have been converted over to the inline interface, this patch removes the unused out-of-line interface. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index a7188eeb135b..c3034de2c235 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -1,14 +1,13 @@ /* * Resizable, Scalable, Concurrent Hash Table * + * Copyright (c) 2015 Herbert Xu * Copyright (c) 2014 Thomas Graf * Copyright (c) 2008-2014 Patrick McHardy * - * Based on the following paper by Josh Triplett, Paul E. McKenney - * and Jonathan Walpole: - * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf - * * Code partially derived from nft_hash + * Rewritten with rehash code from br_multicast plus single list + * pointer as suggested by Josh Triplett * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -282,22 +281,10 @@ int rhashtable_init(struct rhashtable *ht, int rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj, struct bucket_table *old_tbl); -void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node); -bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node); int rhashtable_expand(struct rhashtable *ht); int rhashtable_shrink(struct rhashtable *ht); -void *rhashtable_lookup(struct rhashtable *ht, const void *key); -void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key, - bool (*compare)(void *, void *), void *arg); - -bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj); -bool rhashtable_lookup_compare_insert(struct rhashtable *ht, - struct rhash_head *obj, - bool (*compare)(void *, void *), - void *arg); - int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter); void rhashtable_walk_exit(struct rhashtable_iter *iter); int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU); -- cgit v1.2.3 From 6626af692692b52c8f9e20ad8201a3255e5ab25b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 20 Mar 2015 18:18:45 -0400 Subject: rhashtable: Fix undeclared EEXIST build error on ia64 We need to include linux/errno.h in rhashtable.h since it doesn't always get included otherwise. Reported-by: kbuild test robot Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index c3034de2c235..89d102270570 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -18,6 +18,7 @@ #define _LINUX_RHASHTABLE_H #include +#include #include #include #include -- cgit v1.2.3 From 14aef2919d06bd132f2e6f74f5ccb3caa4c92d54 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Fri, 20 Mar 2015 07:53:06 +0800 Subject: regulator: stw481x: Remove unused fields from struct stw481x The mutex lock is not used at all, remove it. The *vmmc_regulator is not necessary, use a local variable in stw481x_vmmc_regulator_probe() instead. Signed-off-by: Axel Lin Reviewed-by: Linus Walleij Acked-by: Lee Jones Signed-off-by: Mark Brown --- include/linux/mfd/stw481x.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/stw481x.h b/include/linux/mfd/stw481x.h index eda121556e5d..833074b766bd 100644 --- a/include/linux/mfd/stw481x.h +++ b/include/linux/mfd/stw481x.h @@ -41,15 +41,11 @@ /** * struct stw481x - state holder for the Stw481x drivers - * @mutex: mutex to serialize I2C accesses * @i2c_client: corresponding I2C client - * @regulator: regulator device for regulator children * @map: regmap handle to access device registers */ struct stw481x { - struct mutex lock; struct i2c_client *client; - struct regulator_dev *vmmc_regulator; struct regmap *map; }; -- cgit v1.2.3 From d3593b5cef76db45c864de23c599b58198879e8c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Mar 2015 17:15:19 -0700 Subject: Revert "selinux: add a skb_owned_by() hook" This reverts commit ca10b9e9a8ca7342ee07065289cbe74ac128c169. No longer needed after commit eb8895debe1baba41fcb62c78a16f0c63c21662a ("tcp: tcp_make_synack() should use sock_wmalloc") When under SYNFLOOD, we build lot of SYNACK and hit false sharing because of multiple modifications done on sk_listener->sk_wmem_alloc Since tcp_make_synack() uses sock_wmalloc(), there is no need to call skb_set_owner_w() again, as this adds two atomic operations. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/security.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index a1b7dbd127ff..25a079a7c3b3 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1716,7 +1716,6 @@ struct security_operations { int (*tun_dev_attach_queue) (void *security); int (*tun_dev_attach) (struct sock *sk, void *security); int (*tun_dev_open) (void *security); - void (*skb_owned_by) (struct sk_buff *skb, struct sock *sk); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM @@ -2735,8 +2734,6 @@ int security_tun_dev_attach_queue(void *security); int security_tun_dev_attach(struct sock *sk, void *security); int security_tun_dev_open(void *security); -void security_skb_owned_by(struct sk_buff *skb, struct sock *sk); - #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct sock *sock, struct sock *other, @@ -2928,11 +2925,6 @@ static inline int security_tun_dev_open(void *security) { return 0; } - -static inline void security_skb_owned_by(struct sk_buff *skb, struct sock *sk) -{ -} - #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM -- cgit v1.2.3 From 8d0451638ad3f7ccd5250c1dd90e06ad487b2703 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 Mar 2015 20:55:31 +0100 Subject: netfilter: bridge: kill nf_bridge_pad The br_netfilter frag output function calls skb_cow_head() so in case it needs a larger headroom to e.g. re-add a previously stripped PPPOE or VLAN header things will still work (at cost of reallocation). We can then move nf_bridge_encap_header_len to br_netfilter. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index ed0d3bf953c3..2734977199ca 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -23,18 +23,6 @@ enum nf_br_hook_priorities { #define BRNF_8021Q 0x10 #define BRNF_PPPoE 0x20 -static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) -{ - switch (skb->protocol) { - case __cpu_to_be16(ETH_P_8021Q): - return VLAN_HLEN; - case __cpu_to_be16(ETH_P_PPP_SES): - return PPPOE_SES_HLEN; - default: - return 0; - } -} - static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) { if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE)) @@ -44,15 +32,6 @@ static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) int br_handle_frame_finish(struct sk_buff *skb); -/* This is called by the IP fragmenting code and it ensures there is - * enough room for the encapsulating header (if there is one). */ -static inline unsigned int nf_bridge_pad(const struct sk_buff *skb) -{ - if (skb->nf_bridge) - return nf_bridge_encap_header_len(skb); - return 0; -} - static inline void br_drop_fake_rtable(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -62,7 +41,6 @@ static inline void br_drop_fake_rtable(struct sk_buff *skb) } #else -#define nf_bridge_pad(skb) (0) #define br_drop_fake_rtable(skb) do { } while (0) #endif /* CONFIG_BRIDGE_NETFILTER */ -- cgit v1.2.3 From e90d5532773e2bcccc538dd346b9fc3482cd700c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 20 Mar 2015 13:59:27 +0100 Subject: driver core / PM: Add PM domain callbacks for device setup/cleanup If PM domains are in use, it may be necessary to prepare the code handling a PM domain for driver probing. For example, in some cases device drivers rely on the ability to power on the devices with the help of the IO runtime PM framework and the PM domain code needs to be ready for that. Also, if that code has not been fully initialized yet, the driver probing should be deferred. Moreover, after the probing is complete, it may be necessary to put the PM domain in question into the state reflecting the current needs of the devices in it, for example, so that power is not drawn in vain. The same should be done after removing a driver from a device, as the PM domain state may need to be changed to reflect the new situation. For these reasons, introduce new PM domain callbacks, ->activate, ->sync and ->dismiss called, respectively, before probing for a device driver, after the probing has completed successfully and if the probing has failed or the driver has been removed. Signed-off-by: Rafael J. Wysocki Acked-by: Ulf Hansson Reviewed-by: Kevin Hilman Acked-by: Greg Kroah-Hartman --- include/linux/pm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index e2f1be6dd9dd..2d29c64f8fb1 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -603,10 +603,18 @@ extern void dev_pm_put_subsys_data(struct device *dev); * Power domains provide callbacks that are executed during system suspend, * hibernation, system resume and during runtime PM transitions along with * subsystem-level and driver-level callbacks. + * + * @detach: Called when removing a device from the domain. + * @activate: Called before executing probe routines for bus types and drivers. + * @sync: Called after successful driver probe. + * @dismiss: Called after unsuccessful driver probe and after driver removal. */ struct dev_pm_domain { struct dev_pm_ops ops; void (*detach)(struct device *dev, bool power_off); + int (*activate)(struct device *dev); + void (*sync)(struct device *dev); + void (*dismiss)(struct device *dev); }; /* -- cgit v1.2.3 From 446d999c1c92cec996e759dc3c03110596e626f5 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 20 Mar 2015 17:20:33 +0000 Subject: PM / domains: factor out code to get the generic PM domain from a struct device The PM domain code contains two methods to get the generic PM domain for a struct device. One is dev_to_genpd() which is only safe when we know for certain that the device has a generic PM domain attached. The other is coded into genpd_dev_pm_detach() which ensures that the PM domain in the struct device is a generic PM domain (and so is safer). This commit factors out the safer version, documents it, and hides the unsafe dev_to_genpd(). Signed-off-by: Russell King Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 080e778118ba..681ccb053f72 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -127,7 +127,7 @@ static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev) return to_gpd_data(dev->power.subsys_data->domain_data); } -extern struct generic_pm_domain *dev_to_genpd(struct device *dev); +extern struct generic_pm_domain *pm_genpd_lookup_dev(struct device *dev); extern int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, struct gpd_timing_data *td); @@ -163,9 +163,9 @@ static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev) { return ERR_PTR(-ENOSYS); } -static inline struct generic_pm_domain *dev_to_genpd(struct device *dev) +static inline struct generic_pm_domain *pm_genpd_lookup_dev(struct device *dev) { - return ERR_PTR(-ENOSYS); + return NULL; } static inline int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, -- cgit v1.2.3 From 2b290bbb60847c0897c047b5214192810de529df Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 10 Mar 2015 04:48:20 +0100 Subject: can: use sock_efree instead of own destructor It is identical to the can destructor. Signed-off-by: Florian Westphal Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/linux/can/skb.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index cc00d15c6107..b6a52a4b457a 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -44,16 +44,11 @@ static inline void can_skb_reserve(struct sk_buff *skb) skb_reserve(skb, sizeof(struct can_skb_priv)); } -static inline void can_skb_destructor(struct sk_buff *skb) -{ - sock_put(skb->sk); -} - static inline void can_skb_set_owner(struct sk_buff *skb, struct sock *sk) { if (sk) { sock_hold(sk); - skb->destructor = can_skb_destructor; + skb->destructor = sock_efree; skb->sk = sk; } } -- cgit v1.2.3 From c54eb70e3bb8cd0d7b8564bedab63e834656c567 Mon Sep 17 00:00:00 2001 From: Yegor Yefremov Date: Mon, 16 Mar 2015 09:38:13 +0100 Subject: can: add combined rx/tx LED trigger support Add -rxtx trigger, that will be activated both for tx as rx events. This trigger mimics "activity" LED for Ethernet devices. Signed-off-by: Yegor Yefremov Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 2 ++ include/linux/can/led.h | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index c05ff0f9f9a5..c3a9c8fc60fa 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -61,6 +61,8 @@ struct can_priv { char tx_led_trig_name[CAN_LED_NAME_SZ]; struct led_trigger *rx_led_trig; char rx_led_trig_name[CAN_LED_NAME_SZ]; + struct led_trigger *rxtx_led_trig; + char rxtx_led_trig_name[CAN_LED_NAME_SZ]; #endif }; diff --git a/include/linux/can/led.h b/include/linux/can/led.h index e0475c5cbb92..146de4506d21 100644 --- a/include/linux/can/led.h +++ b/include/linux/can/led.h @@ -21,8 +21,10 @@ enum can_led_event { #ifdef CONFIG_CAN_LEDS -/* keep space for interface name + "-tx"/"-rx" suffix and null terminator */ -#define CAN_LED_NAME_SZ (IFNAMSIZ + 4) +/* keep space for interface name + "-tx"/"-rx"/"-rxtx" + * suffix and null terminator + */ +#define CAN_LED_NAME_SZ (IFNAMSIZ + 6) void can_led_event(struct net_device *netdev, enum can_led_event event); void devm_can_led_init(struct net_device *netdev); -- cgit v1.2.3 From ae011d2e48d2e6c2ae29fd8aab439caf2fbfb5a8 Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Fri, 6 Feb 2015 01:06:28 +0530 Subject: pstore: Add pstore type id for PPC64 opal nvram partition This patch adds a new PPC64 partition type to be used for opal specific nvram partition. A new partition type is needed as none of the existing type matches this partition type. Signed-off-by: Hari Bathini Reviewed-by: Kees Cook Signed-off-by: Michael Ellerman --- include/linux/pstore.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 8884f6e507f7..8e7a25b068b0 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -40,6 +40,7 @@ enum pstore_type_id { PSTORE_TYPE_PPC_OF = 5, PSTORE_TYPE_PPC_COMMON = 6, PSTORE_TYPE_PMSG = 7, + PSTORE_TYPE_PPC_OPAL = 8, PSTORE_TYPE_UNKNOWN = 255 }; -- cgit v1.2.3 From 71ad00d61ec861dc68b4544887729850e58cb99b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 19 Mar 2015 10:18:51 -0400 Subject: irq_work: Fix build failure when CONFIG_IRQ_WORK is not defined When CONFIG_IRQ_WORK is not defined (difficult to do, as it also requires CONFIG_PRINTK not to be defined), we get a build failure: kernel/built-in.o: In function `flush_smp_call_function_queue': kernel/smp.c:263: undefined reference to `irq_work_run' kernel/smp.c:263: undefined reference to `irq_work_run' Makefile:933: recipe for target 'vmlinux' failed Simplest thing to do is to make irq_work_run() a nop when not set. Signed-off-by: Steven Rostedt Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20150319101851.4d224d9b@gandalf.local.home Signed-off-by: Ingo Molnar --- include/linux/irq_work.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index bf3fe719c7ce..47b9ebd4a74f 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -38,16 +38,17 @@ bool irq_work_queue(struct irq_work *work); bool irq_work_queue_on(struct irq_work *work, int cpu); #endif -void irq_work_run(void); void irq_work_tick(void); void irq_work_sync(struct irq_work *work); #ifdef CONFIG_IRQ_WORK #include +void irq_work_run(void); bool irq_work_needs_cpu(void); #else static inline bool irq_work_needs_cpu(void) { return false; } +static inline void irq_work_run(void) { } #endif #endif /* _LINUX_IRQ_WORK_H */ -- cgit v1.2.3 From 50f16a8bf9d7a92c437ed1867d0f7e1dc6a9aca9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 5 Mar 2015 22:10:19 +0100 Subject: perf: Remove type specific target pointers The only reason CQM had to use a hard-coded pmu type was so it could use cqm_target in hw_perf_event. Do away with the {tp,bp,cqm}_target pointers and provide a non type specific one. This allows us to do away with that silly pmu type as well. Signed-off-by: Peter Zijlstra (Intel) Cc: Vince Weaver Cc: acme@kernel.org Cc: acme@redhat.com Cc: hpa@zytor.com Cc: jolsa@redhat.com Cc: kanaka.d.juvva@intel.com Cc: matt.fleming@intel.com Cc: tglx@linutronix.de Cc: torvalds@linux-foundation.org Cc: vikas.shivappa@linux.intel.com Link: http://lkml.kernel.org/r/20150305211019.GU21418@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index dac4c2831d82..5aa49d7bfd07 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -119,7 +119,6 @@ struct hw_perf_event { struct hrtimer hrtimer; }; struct { /* tracepoint */ - struct task_struct *tp_target; /* for tp_event->class */ struct list_head tp_list; }; @@ -129,7 +128,6 @@ struct hw_perf_event { struct list_head cqm_events_entry; struct list_head cqm_groups_entry; struct list_head cqm_group_entry; - struct task_struct *cqm_target; }; #ifdef CONFIG_HAVE_HW_BREAKPOINT struct { /* breakpoint */ @@ -138,12 +136,12 @@ struct hw_perf_event { * problem hw_breakpoint has with context * creation and event initalization. */ - struct task_struct *bp_target; struct arch_hw_breakpoint info; struct list_head bp_list; }; #endif }; + struct task_struct *target; int state; local64_t prev_count; u64 sample_period; -- cgit v1.2.3 From 3bfa6f030a01870a43e2a0652437a20b59bc3412 Mon Sep 17 00:00:00 2001 From: Scott Branden Date: Mon, 9 Feb 2015 16:06:28 -0800 Subject: mmc: sdhci: add quirk for ACMD23 broken Add quirk to handle broken auto-CMD23. Some controllers do not respond after the first auto-CMD23 is issued. This allows CMD23 to still work (mandatory for the faster UHS-I mode) rather than disabling CMD23 entirely via SDHCI_QUIRK2_HOST_NO_CMD23. Signed-off by: Corneliu Doban Signed-off-by: Scott Branden Signed-off-by: Ulf Hansson --- include/linux/mmc/sdhci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h index c3e3db196738..1bafb1e7716e 100644 --- a/include/linux/mmc/sdhci.h +++ b/include/linux/mmc/sdhci.h @@ -115,6 +115,8 @@ struct sdhci_host { #define SDHCI_QUIRK2_TUNING_WORK_AROUND (1<<12) /* disable the block count for single block transactions */ #define SDHCI_QUIRK2_SUPPORT_SINGLE (1<<13) +/* Controller broken with using ACMD23 */ +#define SDHCI_QUIRK2_ACMD23_BROKEN (1<<14) int irq; /* Device IRQ */ void __iomem *ioaddr; /* Mapped address */ -- cgit v1.2.3 From 83f13cc9af9822cacc6644ee3c63c81f3930ddad Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 4 Mar 2015 10:19:14 +0100 Subject: mmc: sdhci: Remove the sdhci exported header file Since there no users of the struct sdhci_host, but the shdci host drivers themselves, let's move the definition of it to the local sdhci header. The exported sdhci header then becomes empty, so let's remove it. Signed-off-by: Ulf Hansson --- include/linux/mmc/sdhci.h | 220 ---------------------------------------------- 1 file changed, 220 deletions(-) delete mode 100644 include/linux/mmc/sdhci.h (limited to 'include/linux') diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h deleted file mode 100644 index 1bafb1e7716e..000000000000 --- a/include/linux/mmc/sdhci.h +++ /dev/null @@ -1,220 +0,0 @@ -/* - * linux/include/linux/mmc/sdhci.h - Secure Digital Host Controller Interface - * - * Copyright (C) 2005-2008 Pierre Ossman, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or (at - * your option) any later version. - */ -#ifndef LINUX_MMC_SDHCI_H -#define LINUX_MMC_SDHCI_H - -#include -#include -#include -#include -#include - -struct sdhci_host_next { - unsigned int sg_count; - s32 cookie; -}; - -struct sdhci_host { - /* Data set by hardware interface driver */ - const char *hw_name; /* Hardware bus name */ - - unsigned int quirks; /* Deviations from spec. */ - -/* Controller doesn't honor resets unless we touch the clock register */ -#define SDHCI_QUIRK_CLOCK_BEFORE_RESET (1<<0) -/* Controller has bad caps bits, but really supports DMA */ -#define SDHCI_QUIRK_FORCE_DMA (1<<1) -/* Controller doesn't like to be reset when there is no card inserted. */ -#define SDHCI_QUIRK_NO_CARD_NO_RESET (1<<2) -/* Controller doesn't like clearing the power reg before a change */ -#define SDHCI_QUIRK_SINGLE_POWER_WRITE (1<<3) -/* Controller has flaky internal state so reset it on each ios change */ -#define SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS (1<<4) -/* Controller has an unusable DMA engine */ -#define SDHCI_QUIRK_BROKEN_DMA (1<<5) -/* Controller has an unusable ADMA engine */ -#define SDHCI_QUIRK_BROKEN_ADMA (1<<6) -/* Controller can only DMA from 32-bit aligned addresses */ -#define SDHCI_QUIRK_32BIT_DMA_ADDR (1<<7) -/* Controller can only DMA chunk sizes that are a multiple of 32 bits */ -#define SDHCI_QUIRK_32BIT_DMA_SIZE (1<<8) -/* Controller can only ADMA chunks that are a multiple of 32 bits */ -#define SDHCI_QUIRK_32BIT_ADMA_SIZE (1<<9) -/* Controller needs to be reset after each request to stay stable */ -#define SDHCI_QUIRK_RESET_AFTER_REQUEST (1<<10) -/* Controller needs voltage and power writes to happen separately */ -#define SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER (1<<11) -/* Controller provides an incorrect timeout value for transfers */ -#define SDHCI_QUIRK_BROKEN_TIMEOUT_VAL (1<<12) -/* Controller has an issue with buffer bits for small transfers */ -#define SDHCI_QUIRK_BROKEN_SMALL_PIO (1<<13) -/* Controller does not provide transfer-complete interrupt when not busy */ -#define SDHCI_QUIRK_NO_BUSY_IRQ (1<<14) -/* Controller has unreliable card detection */ -#define SDHCI_QUIRK_BROKEN_CARD_DETECTION (1<<15) -/* Controller reports inverted write-protect state */ -#define SDHCI_QUIRK_INVERTED_WRITE_PROTECT (1<<16) -/* Controller does not like fast PIO transfers */ -#define SDHCI_QUIRK_PIO_NEEDS_DELAY (1<<18) -/* Controller has to be forced to use block size of 2048 bytes */ -#define SDHCI_QUIRK_FORCE_BLK_SZ_2048 (1<<20) -/* Controller cannot do multi-block transfers */ -#define SDHCI_QUIRK_NO_MULTIBLOCK (1<<21) -/* Controller can only handle 1-bit data transfers */ -#define SDHCI_QUIRK_FORCE_1_BIT_DATA (1<<22) -/* Controller needs 10ms delay between applying power and clock */ -#define SDHCI_QUIRK_DELAY_AFTER_POWER (1<<23) -/* Controller uses SDCLK instead of TMCLK for data timeouts */ -#define SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK (1<<24) -/* Controller reports wrong base clock capability */ -#define SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN (1<<25) -/* Controller cannot support End Attribute in NOP ADMA descriptor */ -#define SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC (1<<26) -/* Controller is missing device caps. Use caps provided by host */ -#define SDHCI_QUIRK_MISSING_CAPS (1<<27) -/* Controller uses Auto CMD12 command to stop the transfer */ -#define SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12 (1<<28) -/* Controller doesn't have HISPD bit field in HI-SPEED SD card */ -#define SDHCI_QUIRK_NO_HISPD_BIT (1<<29) -/* Controller treats ADMA descriptors with length 0000h incorrectly */ -#define SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC (1<<30) -/* The read-only detection via SDHCI_PRESENT_STATE register is unstable */ -#define SDHCI_QUIRK_UNSTABLE_RO_DETECT (1<<31) - - unsigned int quirks2; /* More deviations from spec. */ - -#define SDHCI_QUIRK2_HOST_OFF_CARD_ON (1<<0) -#define SDHCI_QUIRK2_HOST_NO_CMD23 (1<<1) -/* The system physically doesn't support 1.8v, even if the host does */ -#define SDHCI_QUIRK2_NO_1_8_V (1<<2) -#define SDHCI_QUIRK2_PRESET_VALUE_BROKEN (1<<3) -#define SDHCI_QUIRK2_CARD_ON_NEEDS_BUS_ON (1<<4) -/* Controller has a non-standard host control register */ -#define SDHCI_QUIRK2_BROKEN_HOST_CONTROL (1<<5) -/* Controller does not support HS200 */ -#define SDHCI_QUIRK2_BROKEN_HS200 (1<<6) -/* Controller does not support DDR50 */ -#define SDHCI_QUIRK2_BROKEN_DDR50 (1<<7) -/* Stop command (CMD12) can set Transfer Complete when not using MMC_RSP_BUSY */ -#define SDHCI_QUIRK2_STOP_WITH_TC (1<<8) -/* Controller does not support 64-bit DMA */ -#define SDHCI_QUIRK2_BROKEN_64_BIT_DMA (1<<9) -/* need clear transfer mode register before send cmd */ -#define SDHCI_QUIRK2_CLEAR_TRANSFERMODE_REG_BEFORE_CMD (1<<10) -/* Capability register bit-63 indicates HS400 support */ -#define SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400 (1<<11) -/* forced tuned clock */ -#define SDHCI_QUIRK2_TUNING_WORK_AROUND (1<<12) -/* disable the block count for single block transactions */ -#define SDHCI_QUIRK2_SUPPORT_SINGLE (1<<13) -/* Controller broken with using ACMD23 */ -#define SDHCI_QUIRK2_ACMD23_BROKEN (1<<14) - - int irq; /* Device IRQ */ - void __iomem *ioaddr; /* Mapped address */ - - const struct sdhci_ops *ops; /* Low level hw interface */ - - /* Internal data */ - struct mmc_host *mmc; /* MMC structure */ - u64 dma_mask; /* custom DMA mask */ - -#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE) - struct led_classdev led; /* LED control */ - char led_name[32]; -#endif - - spinlock_t lock; /* Mutex */ - - int flags; /* Host attributes */ -#define SDHCI_USE_SDMA (1<<0) /* Host is SDMA capable */ -#define SDHCI_USE_ADMA (1<<1) /* Host is ADMA capable */ -#define SDHCI_REQ_USE_DMA (1<<2) /* Use DMA for this req. */ -#define SDHCI_DEVICE_DEAD (1<<3) /* Device unresponsive */ -#define SDHCI_SDR50_NEEDS_TUNING (1<<4) /* SDR50 needs tuning */ -#define SDHCI_NEEDS_RETUNING (1<<5) /* Host needs retuning */ -#define SDHCI_AUTO_CMD12 (1<<6) /* Auto CMD12 support */ -#define SDHCI_AUTO_CMD23 (1<<7) /* Auto CMD23 support */ -#define SDHCI_PV_ENABLED (1<<8) /* Preset value enabled */ -#define SDHCI_SDIO_IRQ_ENABLED (1<<9) /* SDIO irq enabled */ -#define SDHCI_SDR104_NEEDS_TUNING (1<<10) /* SDR104/HS200 needs tuning */ -#define SDHCI_USING_RETUNING_TIMER (1<<11) /* Host is using a retuning timer for the card */ -#define SDHCI_USE_64_BIT_DMA (1<<12) /* Use 64-bit DMA */ -#define SDHCI_HS400_TUNING (1<<13) /* Tuning for HS400 */ - - unsigned int version; /* SDHCI spec. version */ - - unsigned int max_clk; /* Max possible freq (MHz) */ - unsigned int timeout_clk; /* Timeout freq (KHz) */ - unsigned int clk_mul; /* Clock Muliplier value */ - - unsigned int clock; /* Current clock (MHz) */ - u8 pwr; /* Current voltage */ - - bool runtime_suspended; /* Host is runtime suspended */ - bool bus_on; /* Bus power prevents runtime suspend */ - bool preset_enabled; /* Preset is enabled */ - - struct mmc_request *mrq; /* Current request */ - struct mmc_command *cmd; /* Current command */ - struct mmc_data *data; /* Current data request */ - unsigned int data_early:1; /* Data finished before cmd */ - unsigned int busy_handle:1; /* Handling the order of Busy-end */ - - struct sg_mapping_iter sg_miter; /* SG state for PIO */ - unsigned int blocks; /* remaining PIO blocks */ - - int sg_count; /* Mapped sg entries */ - - void *adma_table; /* ADMA descriptor table */ - void *align_buffer; /* Bounce buffer */ - - size_t adma_table_sz; /* ADMA descriptor table size */ - size_t align_buffer_sz; /* Bounce buffer size */ - - dma_addr_t adma_addr; /* Mapped ADMA descr. table */ - dma_addr_t align_addr; /* Mapped bounce buffer */ - - unsigned int desc_sz; /* ADMA descriptor size */ - unsigned int align_sz; /* ADMA alignment */ - unsigned int align_mask; /* ADMA alignment mask */ - - struct tasklet_struct finish_tasklet; /* Tasklet structures */ - - struct timer_list timer; /* Timer for timeouts */ - - u32 caps; /* Alternative CAPABILITY_0 */ - u32 caps1; /* Alternative CAPABILITY_1 */ - - unsigned int ocr_avail_sdio; /* OCR bit masks */ - unsigned int ocr_avail_sd; - unsigned int ocr_avail_mmc; - u32 ocr_mask; /* available voltages */ - - unsigned timing; /* Current timing */ - - u32 thread_isr; - - /* cached registers */ - u32 ier; - - wait_queue_head_t buf_ready_int; /* Waitqueue for Buffer Read Ready interrupt */ - unsigned int tuning_done; /* Condition flag set when CMD19 succeeds */ - - unsigned int tuning_count; /* Timer count for re-tuning */ - unsigned int tuning_mode; /* Re-tuning mode supported by host */ -#define SDHCI_TUNING_MODE_1 0 - struct timer_list tuning_timer; /* Timer for tuning */ - - struct sdhci_host_next next_data; - unsigned long private[0] ____cacheline_aligned; -}; -#endif /* LINUX_MMC_SDHCI_H */ -- cgit v1.2.3 From 03a6d291047da60d56514c28fa1314235bdf2037 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 4 Mar 2015 16:30:07 +0100 Subject: mmc: sdhci-spear: Remove exported header Move the member for card_int_gpio into the struct spear_sdhci. In this way we eliminate the last user of the struct sdhci_plat_data, which enables us to remove the exported header for sdhci-spear. Signed-off-by: Ulf Hansson --- include/linux/mmc/sdhci-spear.h | 34 ---------------------------------- 1 file changed, 34 deletions(-) delete mode 100644 include/linux/mmc/sdhci-spear.h (limited to 'include/linux') diff --git a/include/linux/mmc/sdhci-spear.h b/include/linux/mmc/sdhci-spear.h deleted file mode 100644 index 8cc095a76cf8..000000000000 --- a/include/linux/mmc/sdhci-spear.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * include/linux/mmc/sdhci-spear.h - * - * SDHCI declarations specific to ST SPEAr platform - * - * Copyright (C) 2010 ST Microelectronics - * Viresh Kumar - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - */ - -#ifndef LINUX_MMC_SDHCI_SPEAR_H -#define LINUX_MMC_SDHCI_SPEAR_H - -#include -/* - * struct sdhci_plat_data: spear sdhci platform data structure - * - * card_int_gpio: gpio pin used for card detection - */ -struct sdhci_plat_data { - int card_int_gpio; -}; - -/* This function is used to set platform_data field of pdev->dev */ -static inline void -sdhci_set_plat_data(struct platform_device *pdev, struct sdhci_plat_data *data) -{ - pdev->dev.platform_data = data; -} - -#endif /* LINUX_MMC_SDHCI_SPEAR_H */ -- cgit v1.2.3 From 0345f93138b2224e0d7ce91fcffdb3dd23f364d7 Mon Sep 17 00:00:00 2001 From: "tadeusz.struk@intel.com" Date: Thu, 19 Mar 2015 12:31:25 -0700 Subject: net: socket: add support for async operations Add support for async operations. Signed-off-by: Tadeusz Struk Signed-off-by: David S. Miller --- include/linux/socket.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index fab4d0ddf4ed..c9852ef7e317 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -51,6 +51,7 @@ struct msghdr { void *msg_control; /* ancillary data */ __kernel_size_t msg_controllen; /* ancillary data buffer length */ unsigned int msg_flags; /* flags on received message */ + struct kiocb *msg_iocb; /* ptr to iocb for async requests */ }; struct user_msghdr { -- cgit v1.2.3 From 42cb80a2353f42913ae78074ffa1f1b4a49e5436 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 22 Mar 2015 10:22:19 -0700 Subject: inet: remove sk_listener parameter from syn_ack_timeout() It is not needed, and req->sk_listener points to the listener anyway. request_sock argument can be const. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/dccp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 439ff698000a..3dca24d3ac67 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -317,6 +317,6 @@ static inline const char *dccp_role(const struct sock *sk) return NULL; } -extern void dccp_syn_ack_timeout(struct sock *sk, struct request_sock *req); +extern void dccp_syn_ack_timeout(const struct request_sock *req); #endif /* _LINUX_DCCP_H */ -- cgit v1.2.3 From 85645bab57bfc6b0b43bb96a301c4ef83925c07d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 22 Mar 2015 10:22:24 -0700 Subject: ipv4: dccp: handle ICMP messages on DCCP_NEW_SYN_RECV request sockets dccp_v4_err() can restrict lookups to ehash table, and not to listeners. Note this patch creates the infrastructure, but this means that ICMP messages for request sockets are ignored until complete conversion. New dccp_req_err() helper is exported so that we can use it in IPv6 in following patch. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/dccp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 3dca24d3ac67..221025423e6c 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -43,6 +43,7 @@ enum dccp_state { DCCP_CLOSING = TCP_CLOSING, DCCP_TIME_WAIT = TCP_TIME_WAIT, DCCP_CLOSED = TCP_CLOSE, + DCCP_NEW_SYN_RECV = TCP_NEW_SYN_RECV, DCCP_PARTOPEN = TCP_MAX_STATES, DCCP_PASSIVE_CLOSEREQ, /* clients receiving CloseReq */ DCCP_MAX_STATES @@ -57,6 +58,7 @@ enum { DCCPF_CLOSING = TCPF_CLOSING, DCCPF_TIME_WAIT = TCPF_TIME_WAIT, DCCPF_CLOSED = TCPF_CLOSE, + DCCPF_NEW_SYN_RECV = TCPF_NEW_SYN_RECV, DCCPF_PARTOPEN = (1 << DCCP_PARTOPEN), }; -- cgit v1.2.3 From 0a4e6be9ca17c54817cf814b4b5aa60478c6df27 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 23 Mar 2015 20:21:51 -0300 Subject: x86: kvm: Revert "remove sched notifier for cross-cpu migrations" The following point: 2. per-CPU pvclock time info is updated if the underlying CPU changes. Is not true anymore since "KVM: x86: update pvclock area conditionally, on cpu migration". Add task migration notification back. Problem noticed by Andy Lutomirski. Signed-off-by: Marcelo Tosatti CC: stable@kernel.org # 3.11+ --- include/linux/sched.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6d77432e14ff..be98910cc1e2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -176,6 +176,14 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); extern void calc_global_load(unsigned long ticks); extern void update_cpu_load_nohz(void); +/* Notifier for when a task gets migrated to a new CPU */ +struct task_migration_notifier { + struct task_struct *task; + int from_cpu; + int to_cpu; +}; +extern void register_task_migration_notifier(struct notifier_block *n); + extern unsigned long get_parent_ip(unsigned long addr); extern void dump_cpu_task(int cpu); -- cgit v1.2.3 From 2f921b5bb0511fb698681d8ef35c48be7a9116bf Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 24 Mar 2015 11:51:39 +1030 Subject: lguest: suppress interrupts for single insn, not range. The last patch reduced our interrupt-suppression region to one address, so simplify the code somewhat. Also, remove the obsolete undefined instruction ranges and the comment which refers to lguest_guest.S instead of head_32.S. Signed-off-by: Rusty Russell --- include/linux/lguest.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lguest.h b/include/linux/lguest.h index 9962c6bb1311..6db19f35f7c5 100644 --- a/include/linux/lguest.h +++ b/include/linux/lguest.h @@ -61,8 +61,8 @@ struct lguest_data { u32 tsc_khz; /* Fields initialized by the Guest at boot: */ - /* Instruction range to suppress interrupts even if enabled */ - unsigned long noirq_start, noirq_end; + /* Instruction to suppress interrupts even if enabled */ + unsigned long noirq_iret; /* Address above which page tables are all identical. */ unsigned long kernel_address; /* The vector to try to use for system calls (0x40 or 0x80). */ -- cgit v1.2.3 From de91b25c8011089f5dd99b9d24743db1f550ca4b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 24 Mar 2015 00:50:20 +1100 Subject: rhashtable: Eliminate unnecessary branch in rht_key_hashfn When rht_key_hashfn is called from rhashtable itself and params is equal to ht->p, there is no point in checking params.key_len and falling back to ht->p.key_len. For some reason gcc couldn't figure out that params is the same as ht->p. So let's help it by only checking params.key_len when it's a constant. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 89d102270570..3851952781d7 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -199,8 +199,12 @@ static inline unsigned int rht_key_hashfn( struct rhashtable *ht, const struct bucket_table *tbl, const void *key, const struct rhashtable_params params) { - return rht_bucket_index(tbl, params.hashfn(key, params.key_len ?: - ht->p.key_len, + /* params must be equal to ht->p if it isn't constant. */ + unsigned key_len = __builtin_constant_p(params.key_len) ? + (params.key_len ?: ht->p.key_len) : + params.key_len; + + return rht_bucket_index(tbl, params.hashfn(key, key_len, tbl->hash_rnd)); } -- cgit v1.2.3 From 31ccde2dacea8375c3a7d6fffbf0060ee0d40214 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 24 Mar 2015 00:50:21 +1100 Subject: rhashtable: Allow hashfn to be unset Since every current rhashtable user uses jhash as their hash function, the fact that jhash is an inline function causes each user to generate a copy of its code. This function provides a solution to this problem by allowing hashfn to be unset. In which case rhashtable will automatically set it to jhash. Furthermore, if the key length is a multiple of 4, we will switch over to jhash2. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 3851952781d7..bc2488b98321 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -103,7 +104,7 @@ struct rhashtable; * @min_size: Minimum size while shrinking * @nulls_base: Base value to generate nulls marker * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) - * @hashfn: Function to hash key + * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) * @obj_hashfn: Function to hash object * @obj_cmpfn: Function to compare key with object */ @@ -125,6 +126,7 @@ struct rhashtable_params { * struct rhashtable - Hash table handle * @tbl: Bucket table * @nelems: Number of elements in table + * @key_len: Key length for hashfn * @p: Configuration parameters * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping @@ -134,6 +136,7 @@ struct rhashtable { struct bucket_table __rcu *tbl; atomic_t nelems; bool being_destroyed; + unsigned int key_len; struct rhashtable_params p; struct work_struct run_work; struct mutex mutex; @@ -199,13 +202,31 @@ static inline unsigned int rht_key_hashfn( struct rhashtable *ht, const struct bucket_table *tbl, const void *key, const struct rhashtable_params params) { + unsigned hash; + /* params must be equal to ht->p if it isn't constant. */ - unsigned key_len = __builtin_constant_p(params.key_len) ? - (params.key_len ?: ht->p.key_len) : - params.key_len; + if (!__builtin_constant_p(params.key_len)) + hash = ht->p.hashfn(key, ht->key_len, tbl->hash_rnd); + else if (params.key_len) { + unsigned key_len = params.key_len; + + if (params.hashfn) + hash = params.hashfn(key, key_len, tbl->hash_rnd); + else if (key_len & (sizeof(u32) - 1)) + hash = jhash(key, key_len, tbl->hash_rnd); + else + hash = jhash2(key, key_len / sizeof(u32), + tbl->hash_rnd); + } else { + unsigned key_len = ht->p.key_len; + + if (params.hashfn) + hash = params.hashfn(key, key_len, tbl->hash_rnd); + else + hash = jhash(key, key_len, tbl->hash_rnd); + } - return rht_bucket_index(tbl, params.hashfn(key, key_len, - tbl->hash_rnd)); + return rht_bucket_index(tbl, hash); } static inline unsigned int rht_head_hashfn( -- cgit v1.2.3 From b824478b2145be78ac19e1cf44e2b9036c7a9608 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 24 Mar 2015 00:50:26 +1100 Subject: rhashtable: Add multiple rehash support This patch adds the missing bits to allow multiple rehashes. The read-side as well as remove already handle this correctly. So it's only the rehasher and insertion that need modification to handle this. Note that this patch doesn't actually enable it so for now rehashing is still only performed by the worker thread. This patch also disables the explicit expand/shrink interface because the table is meant to expand and shrink automatically, and continuing to export these interfaces unnecessarily complicates the life of the rehasher since the rehash process is now composed of two parts. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index bc2488b98321..e8ffcdb5e239 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -308,9 +308,6 @@ int rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj, struct bucket_table *old_tbl); -int rhashtable_expand(struct rhashtable *ht); -int rhashtable_shrink(struct rhashtable *ht); - int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter); void rhashtable_walk_exit(struct rhashtable_iter *iter); int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU); @@ -541,17 +538,22 @@ static inline int __rhashtable_insert_fast( rcu_read_lock(); tbl = rht_dereference_rcu(ht->tbl, ht); - hash = rht_head_hashfn(ht, tbl, obj, params); - lock = rht_bucket_lock(tbl, hash); - - spin_lock_bh(lock); - /* Because we have already taken the bucket lock in tbl, - * if we find that future_tbl is not yet visible then - * that guarantees all other insertions of the same entry - * will also grab the bucket lock in tbl because until - * the rehash completes ht->tbl won't be changed. + /* All insertions must grab the oldest table containing + * the hashed bucket that is yet to be rehashed. */ + for (;;) { + hash = rht_head_hashfn(ht, tbl, obj, params); + lock = rht_bucket_lock(tbl, hash); + spin_lock_bh(lock); + + if (tbl->rehash <= hash) + break; + + spin_unlock_bh(lock); + tbl = rht_dereference_rcu(tbl->future_tbl, ht); + } + new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (unlikely(new_tbl)) { err = rhashtable_insert_slow(ht, key, obj, new_tbl); -- cgit v1.2.3 From ccd57b1bd32460d27bbb9c599e795628a3c66983 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 24 Mar 2015 00:50:28 +1100 Subject: rhashtable: Add immediate rehash during insertion This patch reintroduces immediate rehash during insertion. If we find during insertion that the table is full or the chain length exceeds a set limit (currently 16 but may be disabled with insecure_elasticity) then we will force an immediate rehash. The rehash will contain an expansion if the table utilisation exceeds 75%. If this rehash fails then the insertion will fail. Otherwise the insertion will be reattempted in the new hash table. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 42 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index e8ffcdb5e239..f9ecf32bce55 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -103,6 +103,7 @@ struct rhashtable; * @max_size: Maximum size while expanding * @min_size: Minimum size while shrinking * @nulls_base: Base value to generate nulls marker + * @insecure_elasticity: Set to true to disable chain length checks * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) * @obj_hashfn: Function to hash object @@ -116,6 +117,7 @@ struct rhashtable_params { unsigned int max_size; unsigned int min_size; u32 nulls_base; + bool insecure_elasticity; size_t locks_mul; rht_hashfn_t hashfn; rht_obj_hashfn_t obj_hashfn; @@ -127,6 +129,7 @@ struct rhashtable_params { * @tbl: Bucket table * @nelems: Number of elements in table * @key_len: Key length for hashfn + * @elasticity: Maximum chain length before rehash * @p: Configuration parameters * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping @@ -137,6 +140,7 @@ struct rhashtable { atomic_t nelems; bool being_destroyed; unsigned int key_len; + unsigned int elasticity; struct rhashtable_params p; struct work_struct run_work; struct mutex mutex; @@ -266,6 +270,17 @@ static inline bool rht_shrink_below_30(const struct rhashtable *ht, tbl->size > ht->p.min_size; } +/** + * rht_grow_above_100 - returns true if nelems > table-size + * @ht: hash table + * @tbl: current table + */ +static inline bool rht_grow_above_100(const struct rhashtable *ht, + const struct bucket_table *tbl) +{ + return atomic_read(&ht->nelems) > tbl->size; +} + /* The bucket lock is selected based on the hash and protects mutations * on a group of hash buckets. * @@ -307,6 +322,7 @@ int rhashtable_init(struct rhashtable *ht, int rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj, struct bucket_table *old_tbl); +int rhashtable_insert_rehash(struct rhashtable *ht); int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter); void rhashtable_walk_exit(struct rhashtable_iter *iter); @@ -529,12 +545,14 @@ static inline int __rhashtable_insert_fast( .ht = ht, .key = key, }; - int err = -EEXIST; struct bucket_table *tbl, *new_tbl; struct rhash_head *head; spinlock_t *lock; + unsigned elasticity; unsigned hash; + int err; +restart: rcu_read_lock(); tbl = rht_dereference_rcu(ht->tbl, ht); @@ -557,20 +575,34 @@ static inline int __rhashtable_insert_fast( new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (unlikely(new_tbl)) { err = rhashtable_insert_slow(ht, key, obj, new_tbl); + if (err == -EAGAIN) + goto slow_path; goto out; } - if (!key) - goto skip_lookup; + if (unlikely(rht_grow_above_100(ht, tbl))) { +slow_path: + spin_unlock_bh(lock); + rcu_read_unlock(); + err = rhashtable_insert_rehash(ht); + if (err) + return err; + + goto restart; + } + err = -EEXIST; + elasticity = ht->elasticity; rht_for_each(head, tbl, hash) { - if (unlikely(!(params.obj_cmpfn ? + if (key && + unlikely(!(params.obj_cmpfn ? params.obj_cmpfn(&arg, rht_obj(ht, head)) : rhashtable_compare(&arg, rht_obj(ht, head))))) goto out; + if (!--elasticity) + goto slow_path; } -skip_lookup: err = 0; head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); -- cgit v1.2.3 From 3d1bec99320d4e96897805440f8cf4f68eff226b Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 23 Mar 2015 23:36:00 +0100 Subject: ipv6: introduce secret_stable to ipv6_devconf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch implements the procfs logic for the stable_address knob: The secret is formatted as an ipv6 address and will be stored per interface and per namespace. We track initialized flag and return EIO errors until the secret is set. We don't inherit the secret to newly created namespaces. Cc: Erik Kline Cc: Fernando Gont Cc: Lorenzo Colitti Cc: YOSHIFUJI Hideaki/吉藤英明 Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/ipv6.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 4d5169f5d7d1..82806c60aa42 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -53,6 +53,10 @@ struct ipv6_devconf { __s32 ndisc_notify; __s32 suppress_frag_ndisc; __s32 accept_ra_mtu; + struct ipv6_stable_secret { + bool initialized; + struct in6_addr secret; + } stable_secret; void *sysctl; }; -- cgit v1.2.3 From ba7c95ea3870fe7b847466d39a049ab6f156aa2c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 24 Mar 2015 09:53:17 +1100 Subject: rhashtable: Fix sleeping inside RCU critical section in walk_stop The commit 963ecbd41a1026d99ec7537c050867428c397b89 ("rhashtable: Fix use-after-free in rhashtable_walk_stop") fixed a real bug but created another one because we may end up sleeping inside an RCU critical section. This patch fixes it properly by replacing the mutex with a spin lock that specifically protects the walker lists. Reported-by: Sasha Levin Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index f9ecf32bce55..d7be9cb0e91f 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -133,6 +133,7 @@ struct rhashtable_params { * @p: Configuration parameters * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping + * @lock: Spin lock to protect walker list * @being_destroyed: True if table is set up for destruction */ struct rhashtable { @@ -144,6 +145,7 @@ struct rhashtable { struct rhashtable_params p; struct work_struct run_work; struct mutex mutex; + spinlock_t lock; }; /** -- cgit v1.2.3 From 0117ec1970c5fa9c566045e7df8db76acc8f150e Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 23 Mar 2015 18:40:02 +0100 Subject: net: remove never used forwarding_accel_ops pointer from net_device Cc: John Fastabend Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5ae69e7df867..08c4ab37189f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1342,7 +1342,6 @@ enum netdev_priv_flags { * @netdev_ops: Includes several pointers to callbacks, * if one wants to override the ndo_*() functions * @ethtool_ops: Management operations - * @fwd_ops: Management operations * @header_ops: Includes callbacks for creating,parsing,caching,etc * of Layer 2 headers. * @@ -1551,7 +1550,6 @@ struct net_device { #endif const struct net_device_ops *netdev_ops; const struct ethtool_ops *ethtool_ops; - const struct forwarding_accel_ops *fwd_ops; #ifdef CONFIG_NET_SWITCHDEV const struct swdev_ops *swdev_ops; #endif -- cgit v1.2.3 From 44486b48b066330e0ed0a66478cc49f5975ec6c1 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Wed, 18 Mar 2015 18:38:26 +0200 Subject: wl12xx: use frequency instead of enumerations for pdata clocks Instead of defining an enumeration with the FW specific values for the different clock rates, use the actual frequency instead. Also add a boolean to specify whether the clock is XTAL or not. Change all board files to reflect this. Signed-off-by: Luciano Coelho [Eliad - small fixes, update board file changes] Signed-off-by: Eliad Peller Tested-by: Nikita Kiryanov Acked-by: Kalle Valo Acked-by: Sekhar Nori Signed-off-by: Tony Lindgren --- include/linux/wl12xx.h | 27 +++------------------------ 1 file changed, 3 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h index 3876b67dbcbc..eea1e6dac4be 100644 --- a/include/linux/wl12xx.h +++ b/include/linux/wl12xx.h @@ -26,28 +26,6 @@ #include -/* Reference clock values */ -enum { - WL12XX_REFCLOCK_19 = 0, /* 19.2 MHz */ - WL12XX_REFCLOCK_26 = 1, /* 26 MHz */ - WL12XX_REFCLOCK_38 = 2, /* 38.4 MHz */ - WL12XX_REFCLOCK_52 = 3, /* 52 MHz */ - WL12XX_REFCLOCK_38_XTAL = 4, /* 38.4 MHz, XTAL */ - WL12XX_REFCLOCK_26_XTAL = 5, /* 26 MHz, XTAL */ -}; - -/* TCXO clock values */ -enum { - WL12XX_TCXOCLOCK_19_2 = 0, /* 19.2MHz */ - WL12XX_TCXOCLOCK_26 = 1, /* 26 MHz */ - WL12XX_TCXOCLOCK_38_4 = 2, /* 38.4MHz */ - WL12XX_TCXOCLOCK_52 = 3, /* 52 MHz */ - WL12XX_TCXOCLOCK_16_368 = 4, /* 16.368 MHz */ - WL12XX_TCXOCLOCK_32_736 = 5, /* 32.736 MHz */ - WL12XX_TCXOCLOCK_16_8 = 6, /* 16.8 MHz */ - WL12XX_TCXOCLOCK_33_6 = 7, /* 33.6 MHz */ -}; - struct wl1251_platform_data { int power_gpio; /* SDIO only: IRQ number if WLAN_IRQ line is used, 0 for SDIO IRQs */ @@ -58,8 +36,9 @@ struct wl1251_platform_data { struct wl12xx_platform_data { int irq; u32 irq_trigger; - int board_ref_clock; - int board_tcxo_clock; + bool ref_clock_xtal; /* specify whether the clock is XTAL or not */ + u32 ref_clock_freq; /* in Hertz */ + u32 tcxo_clock_freq; /* in Hertz, tcxo is always XTAL */ bool pwr_in_suspend; }; -- cgit v1.2.3 From 83c3a7d4ac7fdc29a64bf9a5467a36b4c72a1eed Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Wed, 18 Mar 2015 18:38:30 +0200 Subject: wlcore: remove wl12xx_platform_data Now that we have wlcore device-tree bindings in place (for both wl12xx and wl18xx), remove the legacy wl12xx_platform_data struct, and move its members into the platform device data (that is passed to wlcore) Davinci 850 is the only platform that still set the platform data in the legacy way (and doesn't have DT bindings), so remove the relevant code/Kconfig option from the board file (as suggested by Sekhar Nori) Since no one currently uses wlcore_spi, simply remove its platform data support (DT bindings will have to be added if someone actually needs it) Signed-off-by: Luciano Coelho Signed-off-by: Eliad Peller Tested-by: Nikita Kiryanov Acked-by: Kalle Valo Acked-by: Sekhar Nori Signed-off-by: Tony Lindgren --- include/linux/wl12xx.h | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h index eea1e6dac4be..95704cd4cfab 100644 --- a/include/linux/wl12xx.h +++ b/include/linux/wl12xx.h @@ -33,39 +33,14 @@ struct wl1251_platform_data { bool use_eeprom; }; -struct wl12xx_platform_data { - int irq; - u32 irq_trigger; - bool ref_clock_xtal; /* specify whether the clock is XTAL or not */ - u32 ref_clock_freq; /* in Hertz */ - u32 tcxo_clock_freq; /* in Hertz, tcxo is always XTAL */ - bool pwr_in_suspend; -}; - #ifdef CONFIG_WILINK_PLATFORM_DATA -int wl12xx_set_platform_data(const struct wl12xx_platform_data *data); - -struct wl12xx_platform_data *wl12xx_get_platform_data(void); - int wl1251_set_platform_data(const struct wl1251_platform_data *data); struct wl1251_platform_data *wl1251_get_platform_data(void); #else -static inline -int wl12xx_set_platform_data(const struct wl12xx_platform_data *data) -{ - return -ENOSYS; -} - -static inline -struct wl12xx_platform_data *wl12xx_get_platform_data(void) -{ - return ERR_PTR(-ENODATA); -} - static inline int wl1251_set_platform_data(const struct wl1251_platform_data *data) { -- cgit v1.2.3 From 3a02824396c1df1db422d067b0fdcac724f44dd6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Mar 2015 14:14:18 -0400 Subject: libata: remove ATA_FLAG_LOWTAG sata_sil24 for some reason pukes when tags are allocated round-robin which helps tag ordered controllers. To work around the issue, 72dd299d5039 ("libata: allow sata_sil24 to opt-out of tag ordered submission") introduced ATA_FLAG_LOWTAG which tells libata tag allocation to do lowest-first. However, with the recent switch to blk-mq tag allocation, the liata tag allocation code path is no longer used and the workaround is now implemented in the block layer and selected by setting scsi_host_template->tag_alloc_policy to BLK_TAG_ALLOC_FIFO. See 9269e23496dd ("libata: make sata_sil24 use fifo tag allocator"). This leaves ATA_FLAG_LOWTAG withoout any actual user. Remove it. Signed-off-by: Tejun Heo Cc: Shaohua Li Cc: Dan Williams --- include/linux/libata.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index fc03efa64ffe..ebe132a4873c 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -231,7 +231,6 @@ enum { ATA_FLAG_SW_ACTIVITY = (1 << 22), /* driver supports sw activity * led */ ATA_FLAG_NO_DIPM = (1 << 23), /* host not happy with DIPM */ - ATA_FLAG_LOWTAG = (1 << 24), /* host wants lowest available tag */ /* bits 24:31 of ap->flags are reserved for LLD specific flags */ -- cgit v1.2.3 From 27cd5452476978283decb19e429e81fc6c71e74b Mon Sep 17 00:00:00 2001 From: Michal Sekletar Date: Tue, 24 Mar 2015 14:48:41 +0100 Subject: filter: introduce SKF_AD_VLAN_TPID BPF extension If vlan offloading takes place then vlan header is removed from frame and its contents, both vlan_tci and vlan_proto, is available to user space via TPACKET interface. However, only vlan_tci can be used in BPF filters. This commit introduces a new BPF extension. It makes possible to load the value of vlan_proto (vlan TPID) to register A. Support for classic BPF and eBPF is being added, analogous to skb->protocol. Cc: Daniel Borkmann Cc: Alexei Starovoitov Cc: Jiri Pirko Signed-off-by: Michal Sekletar Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/filter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 9ee8c67ea249..fa11b3a367be 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -454,6 +454,7 @@ static inline u16 bpf_anc_helper(const struct sock_filter *ftest) BPF_ANCILLARY(VLAN_TAG_PRESENT); BPF_ANCILLARY(PAY_OFFSET); BPF_ANCILLARY(RANDOM); + BPF_ANCILLARY(VLAN_TPID); } /* Fallthrough. */ default: -- cgit v1.2.3 From 58be8a583d8d316448bafa5926414cfb83c02dec Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 24 Mar 2015 14:18:16 +0100 Subject: rhashtable: Extend RCU read lock into rhashtable_insert_rehash() rhashtable_insert_rehash() requires RCU locks to be held in order to access ht->tbl and traverse to the last table. Fixes: ccd57b1bd324 ("rhashtable: Add immediate rehash during insertion") Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index d7be9cb0e91f..5976ab59b88f 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -585,8 +585,8 @@ restart: if (unlikely(rht_grow_above_100(ht, tbl))) { slow_path: spin_unlock_bh(lock); - rcu_read_unlock(); err = rhashtable_insert_rehash(ht); + rcu_read_unlock(); if (err) return err; -- cgit v1.2.3 From 299e5c32a37a6bca8175db177117467bd1ce970a Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 24 Mar 2015 14:18:17 +0100 Subject: rhashtable: Use 'unsigned int' consistently Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 5976ab59b88f..f89cda067cb9 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -208,13 +208,13 @@ static inline unsigned int rht_key_hashfn( struct rhashtable *ht, const struct bucket_table *tbl, const void *key, const struct rhashtable_params params) { - unsigned hash; + unsigned int hash; /* params must be equal to ht->p if it isn't constant. */ if (!__builtin_constant_p(params.key_len)) hash = ht->p.hashfn(key, ht->key_len, tbl->hash_rnd); else if (params.key_len) { - unsigned key_len = params.key_len; + unsigned int key_len = params.key_len; if (params.hashfn) hash = params.hashfn(key, key_len, tbl->hash_rnd); @@ -224,7 +224,7 @@ static inline unsigned int rht_key_hashfn( hash = jhash2(key, key_len / sizeof(u32), tbl->hash_rnd); } else { - unsigned key_len = ht->p.key_len; + unsigned int key_len = ht->p.key_len; if (params.hashfn) hash = params.hashfn(key, key_len, tbl->hash_rnd); @@ -512,7 +512,7 @@ static inline void *rhashtable_lookup_fast( }; const struct bucket_table *tbl; struct rhash_head *he; - unsigned hash; + unsigned int hash; rcu_read_lock(); @@ -550,8 +550,8 @@ static inline int __rhashtable_insert_fast( struct bucket_table *tbl, *new_tbl; struct rhash_head *head; spinlock_t *lock; - unsigned elasticity; - unsigned hash; + unsigned int elasticity; + unsigned int hash; int err; restart: @@ -718,7 +718,7 @@ static inline int __rhashtable_remove_fast( struct rhash_head __rcu **pprev; struct rhash_head *he; spinlock_t * lock; - unsigned hash; + unsigned int hash; int err = -ENOENT; hash = rht_head_hashfn(ht, tbl, obj, params); -- cgit v1.2.3 From ac833bddb591b9c7a0609f440f196375be184044 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 24 Mar 2015 14:18:18 +0100 Subject: rhashtable: Mark internal/private inline functions as such Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index f89cda067cb9..0e1f975ad101 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -539,6 +539,7 @@ restart: return NULL; } +/* Internal function, please use rhashtable_insert_fast() instead */ static inline int __rhashtable_insert_fast( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params) @@ -711,6 +712,7 @@ static inline int rhashtable_lookup_insert_key( return __rhashtable_insert_fast(ht, key, obj, params); } +/* Internal function, please use rhashtable_remove_fast() instead */ static inline int __rhashtable_remove_fast( struct rhashtable *ht, struct bucket_table *tbl, struct rhash_head *obj, const struct rhashtable_params params) -- cgit v1.2.3 From b5e2c150ac914f28a28833b57397bec0b0a2bd5f Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 24 Mar 2015 20:42:19 +0000 Subject: rhashtable: Disable automatic shrinking by default Introduce a new bool automatic_shrinking to require the user to explicitly opt-in to automatic shrinking of tables. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 0e1f975ad101..ae26c494e230 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -2,7 +2,7 @@ * Resizable, Scalable, Concurrent Hash Table * * Copyright (c) 2015 Herbert Xu - * Copyright (c) 2014 Thomas Graf + * Copyright (c) 2014-2015 Thomas Graf * Copyright (c) 2008-2014 Patrick McHardy * * Code partially derived from nft_hash @@ -104,6 +104,7 @@ struct rhashtable; * @min_size: Minimum size while shrinking * @nulls_base: Base value to generate nulls marker * @insecure_elasticity: Set to true to disable chain length checks + * @automatic_shrinking: Enable automatic shrinking of tables * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) * @obj_hashfn: Function to hash object @@ -118,6 +119,7 @@ struct rhashtable_params { unsigned int min_size; u32 nulls_base; bool insecure_elasticity; + bool automatic_shrinking; size_t locks_mul; rht_hashfn_t hashfn; rht_obj_hashfn_t obj_hashfn; @@ -784,7 +786,8 @@ static inline int rhashtable_remove_fast( goto out; atomic_dec(&ht->nelems); - if (rht_shrink_below_30(ht, tbl)) + if (unlikely(ht->p.automatic_shrinking && + rht_shrink_below_30(ht, tbl))) schedule_work(&ht->run_work); out: -- cgit v1.2.3 From 6b6f302ceda7a052dab545d6c69abf5f0d4a6cab Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 24 Mar 2015 14:18:20 +0100 Subject: rhashtable: Add rhashtable_free_and_destroy() rhashtable_destroy() variant which stops rehashes, iterates over the table and calls a callback to release resources. Avoids need for nft_hash to embed rhashtable internals and allows to get rid of the being_destroyed flag. It also saves a 2nd mutex lock upon destruction. Also fixes an RCU lockdep splash on nft set destruction due to calling rht_for_each_entry_safe() without holding bucket locks. Open code this loop as we need know that no mutations may occur in parallel. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index ae26c494e230..99f2e49a8a07 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -136,12 +136,10 @@ struct rhashtable_params { * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping * @lock: Spin lock to protect walker list - * @being_destroyed: True if table is set up for destruction */ struct rhashtable { struct bucket_table __rcu *tbl; atomic_t nelems; - bool being_destroyed; unsigned int key_len; unsigned int elasticity; struct rhashtable_params p; @@ -334,6 +332,9 @@ int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU); void *rhashtable_walk_next(struct rhashtable_iter *iter); void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU); +void rhashtable_free_and_destroy(struct rhashtable *ht, + void (*free_fn)(void *ptr, void *arg), + void *arg); void rhashtable_destroy(struct rhashtable *ht); #define rht_dereference(p, ht) \ -- cgit v1.2.3 From dd46c787788d5bf5b974729d43e4c405814a4c7d Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 25 Mar 2015 15:07:05 +1100 Subject: fs: Add support FALLOC_FL_INSERT_RANGE for fallocate FALLOC_FL_INSERT_RANGE command is the opposite command of FALLOC_FL_COLLAPSE_RANGE that is needed for someone who wants to add some data in the middle of file. FALLOC_FL_INSERT_RANGE will create space for writing new data within a file after shifting extents to right as given length. This command also has same limitations as FALLOC_FL_COLLAPSE_RANGE in that operations need to be filesystem block boundary aligned and cannot cross the current EOF. Signed-off-by: Namjae Jeon Signed-off-by: Ashish Sangwan Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- include/linux/falloc.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/falloc.h b/include/linux/falloc.h index 31591686ac2d..996111000a8c 100644 --- a/include/linux/falloc.h +++ b/include/linux/falloc.h @@ -21,4 +21,10 @@ struct space_resv { #define FS_IOC_RESVSP _IOW('X', 40, struct space_resv) #define FS_IOC_RESVSP64 _IOW('X', 42, struct space_resv) +#define FALLOC_FL_SUPPORTED_MASK (FALLOC_FL_KEEP_SIZE | \ + FALLOC_FL_PUNCH_HOLE | \ + FALLOC_FL_COLLAPSE_RANGE | \ + FALLOC_FL_ZERO_RANGE | \ + FALLOC_FL_INSERT_RANGE) + #endif /* _FALLOC_H_ */ -- cgit v1.2.3 From 3a3e1c88362429ca3a6ef84d232e56b197294ce0 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Mon, 23 Feb 2015 15:57:32 +0200 Subject: ARM: OMAP2+: PRCM: add support for static clock memmap indices All clock provider related drivers will now register their iomaps with a static index. This makes it easier to split up the individual drivers to their own files in subsequent patches. Signed-off-by: Tero Kristo --- include/linux/clk/ti.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 67844003493d..19895a3f48b7 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -221,6 +221,7 @@ struct ti_dt_clk { /* Static memmap indices */ enum { TI_CLKM_CM = 0, + TI_CLKM_CM2, TI_CLKM_PRM, TI_CLKM_SCRM, }; -- cgit v1.2.3 From bd735995308b553cc3c7f6a975aa284b270c7e2c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 2 Feb 2015 15:44:54 +0100 Subject: misc: Add attribute groups Add groups field to struct miscdevice for passing the attribute groups at device creation. In this way, the driver can avoid the manual call of device_create_file() after the device registration, which is basically a racy operation, in addition to the reduction of manual device_remove_file() calls. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- include/linux/miscdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index ee80dd7d9f60..819077c32690 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -52,6 +52,7 @@ #define MISC_DYNAMIC_MINOR 255 struct device; +struct attribute_group; struct miscdevice { int minor; @@ -60,6 +61,7 @@ struct miscdevice { struct list_head list; struct device *parent; struct device *this_device; + const struct attribute_group **groups; const char *nodename; umode_t mode; }; -- cgit v1.2.3 From ba61af6f3e4766c76aec0b5e7f2bb8277e1acdd0 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 12 Mar 2015 09:58:28 -0400 Subject: sysfs: Document struct attribute_group Document variables defined in struct attribute_group to ensure correct usage. Signed-off-by: Guenter Roeck Signed-off-by: Vivien Didelot Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index ddad16148bd6..99382c0df17e 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -57,6 +57,21 @@ do { \ #define sysfs_attr_init(attr) do {} while (0) #endif +/** + * struct attribute_group - data structure used to declare an attribute group. + * @name: Optional: Attribute group name + * If specified, the attribute group will be created in + * a new subdirectory with this name. + * @is_visible: Optional: Function to return permissions associated with an + * attribute of the group. Will be called repeatedly for each + * attribute in the group. Only read/write permissions as well as + * SYSFS_PREALLOC are accepted. Must return 0 if an attribute is + * not visible. The returned value will replace static permissions + * defined in struct attribute or struct bin_attribute. + * @attrs: Pointer to NULL terminated list of attributes. + * @bin_attrs: Pointer to NULL terminated list of binary attributes. + * Either attrs or bin_attrs or both must be provided. + */ struct attribute_group { const char *name; umode_t (*is_visible)(struct kobject *, -- cgit v1.2.3 From e8a51e1b51ee5730ad3913f3962e3099a5e19359 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 17 Feb 2015 19:03:41 -0600 Subject: device: Add dev_of_node() accessor Suggested by Arnd Bergmann, this gives a practical accessor for the of_node field of struct device while instructing the compiler that it will be NULL if CONFIG_OF is not set. Signed-off-by: Benjamin Herrenschmidt Acked-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 0eb8ee2dc6d1..f3f2c7e38060 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -916,6 +916,13 @@ static inline void device_lock_assert(struct device *dev) lockdep_assert_held(&dev->mutex); } +static inline struct device_node *dev_of_node(struct device *dev) +{ + if (!IS_ENABLED(CONFIG_OF)) + return NULL; + return dev->of_node; +} + void driver_init(void); /* -- cgit v1.2.3 From 44caf2f37f009b2affa743073fa935826b6ab2fd Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 13 Feb 2015 14:25:24 +0000 Subject: iommu/vt-d: kill bogus ecap_niotlb_iunits() As far back as I can see (which right now is a draft of the v1.2 spec dating from September 2008), bits 24-31 of the Extended Capability Register have already been reserved. I have no idea why anyone ever thought there would be multiple sets of IOTLB registers, but we've never supported them and all we do is make sure we map enough MMIO space for them. Kill it dead. Those bits do actually have a different meaning now. Signed-off-by: David Woodhouse --- include/linux/intel-iommu.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index a65208a8fe18..ee24ada20428 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -115,10 +115,8 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) * Extended Capability Register */ -#define ecap_niotlb_iunits(e) ((((e) >> 24) & 0xff) + 1) #define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16) -#define ecap_max_iotlb_offset(e) \ - (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) +#define ecap_max_iotlb_offset(e) (ecap_iotlb_offset(e) + 16) #define ecap_coherent(e) ((e) & 0x1) #define ecap_qis(e) ((e) & 0x2) #define ecap_pass_through(e) ((e >> 6) & 0x1) -- cgit v1.2.3 From 4423f5e7d28c26af31df711c5c21eeacfac737b4 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 25 Mar 2015 15:43:39 +0000 Subject: iommu/vt-d: Add new extended capabilities from v2.3 VT-d specification Signed-off-by: David Woodhouse --- include/linux/intel-iommu.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index ee24ada20428..796ef9645827 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -115,6 +115,17 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) * Extended Capability Register */ +#define ecap_pss(e) ((e >> 35) & 0x1f) +#define ecap_eafs(e) ((e >> 34) & 0x1) +#define ecap_nwfs(e) ((e >> 33) & 0x1) +#define ecap_srs(e) ((e >> 31) & 0x1) +#define ecap_ers(e) ((e >> 30) & 0x1) +#define ecap_prs(e) ((e >> 29) & 0x1) +#define ecap_pasid(e) ((e >> 28) & 0x1) +#define ecap_dis(e) ((e >> 27) & 0x1) +#define ecap_nest(e) ((e >> 26) & 0x1) +#define ecap_mts(e) ((e >> 25) & 0x1) +#define ecap_ecs(e) ((e >> 24) & 0x1) #define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16) #define ecap_max_iotlb_offset(e) (ecap_iotlb_offset(e) + 16) #define ecap_coherent(e) ((e) & 0x1) @@ -178,6 +189,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define DMA_GSTS_IRES (((u32)1) << 25) #define DMA_GSTS_CFIS (((u32)1) << 23) +/* DMA_RTADDR_REG */ +#define DMA_RTADDR_RTT (((u64)1) << 11) + /* CCMD_REG */ #define DMA_CCMD_ICC (((u64)1) << 63) #define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61) -- cgit v1.2.3 From 49f7b33e63fec9d16e7ee62ba8f8ab4159cbdc26 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 25 Mar 2015 13:07:45 +0000 Subject: rhashtable: provide len to obj_hashfn nftables sets will be converted to use so called setextensions, moving the key to a non-fixed position. To hash it, the obj_hashfn must be used, however it so far doesn't receive the length parameter. Pass the key length to obj_hashfn() and convert existing users. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/linux/rhashtable.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 99f2e49a8a07..e23d242d1230 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -88,7 +88,7 @@ struct rhashtable_compare_arg { }; typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed); -typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 seed); +typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed); typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg, const void *obj); @@ -242,7 +242,9 @@ static inline unsigned int rht_head_hashfn( const char *ptr = rht_obj(ht, he); return likely(params.obj_hashfn) ? - rht_bucket_index(tbl, params.obj_hashfn(ptr, tbl->hash_rnd)) : + rht_bucket_index(tbl, params.obj_hashfn(ptr, params.key_len ?: + ht->p.key_len, + tbl->hash_rnd)) : rht_key_hashfn(ht, tbl, ptr + params.key_offset, params); } -- cgit v1.2.3 From 2be608561abfcceda4b35b71a0c1ec5088bb39b9 Mon Sep 17 00:00:00 2001 From: Jaewon Kim Date: Thu, 12 Mar 2015 19:11:13 +0900 Subject: phy: exynos5-usbdrd: Add to support for Exynos5433 SoC This patch adds driver data to support for Exynos5433 SoC. The Exynos5433 has one USB3.0 Host and USB3.0 DRD(Dual Role Device). Exynos5433 is simplar to Eyxnos7 but Exynos5433 have one more USB3.0 Host controller. Signed-off-by: Jaewon Kim Signed-off-by: Kishon Vijay Abraham I --- include/linux/mfd/syscon/exynos5-pmu.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/syscon/exynos5-pmu.h b/include/linux/mfd/syscon/exynos5-pmu.h index 00ef24bf6ede..9352adc95de6 100644 --- a/include/linux/mfd/syscon/exynos5-pmu.h +++ b/include/linux/mfd/syscon/exynos5-pmu.h @@ -36,6 +36,9 @@ #define EXYNOS5420_MTCADC_PHY_CONTROL (0x724) #define EXYNOS5420_DPTX_PHY_CONTROL (0x728) +/* Exynos5433 specific register definitions */ +#define EXYNOS5433_USBHOST30_PHY_CONTROL (0x728) + #define EXYNOS5_PHY_ENABLE BIT(0) #define EXYNOS5_MIPI_PHY_S_RESETN BIT(1) -- cgit v1.2.3 From e2e40f2c1ed433c5e224525c8c862fd32e5d3df2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 22 Feb 2015 08:58:50 -0800 Subject: fs: move struct kiocb to fs.h struct kiocb now is a generic I/O container, so move it to fs.h. Also do a #include diet for aio.h while we're at it. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/aio.h | 31 +------------------------------ include/linux/fs.h | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/aio.h b/include/linux/aio.h index 5c40b61285ac..9eb42dbc5582 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -1,52 +1,23 @@ #ifndef __LINUX__AIO_H #define __LINUX__AIO_H -#include -#include #include -#include -#include - -#include struct kioctx; struct kiocb; +struct mm_struct; #define KIOCB_KEY 0 typedef int (kiocb_cancel_fn)(struct kiocb *); -#define IOCB_EVENTFD (1 << 0) - -struct kiocb { - struct file *ki_filp; - loff_t ki_pos; - void (*ki_complete)(struct kiocb *iocb, long ret, long ret2); - void *private; - int ki_flags; -}; - -static inline bool is_sync_kiocb(struct kiocb *kiocb) -{ - return kiocb->ki_complete == NULL; -} - -static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) -{ - *kiocb = (struct kiocb) { - .ki_filp = filp, - }; -} - /* prototypes */ #ifdef CONFIG_AIO -struct mm_struct; extern void exit_aio(struct mm_struct *mm); extern long do_io_submit(aio_context_t ctx_id, long nr, struct iocb __user *__user *iocbpp, bool compat); void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel); #else -struct mm_struct; static inline void exit_aio(struct mm_struct *mm) { } static inline long do_io_submit(aio_context_t ctx_id, long nr, struct iocb __user * __user *iocbpp, diff --git a/include/linux/fs.h b/include/linux/fs.h index 447932aed1e1..48c1472bde4a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -314,6 +314,28 @@ struct page; struct address_space; struct writeback_control; +#define IOCB_EVENTFD (1 << 0) + +struct kiocb { + struct file *ki_filp; + loff_t ki_pos; + void (*ki_complete)(struct kiocb *iocb, long ret, long ret2); + void *private; + int ki_flags; +}; + +static inline bool is_sync_kiocb(struct kiocb *kiocb) +{ + return kiocb->ki_complete == NULL; +} + +static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) +{ + *kiocb = (struct kiocb) { + .ki_filp = filp, + }; +} + /* * "descriptor" for what we're up to with a read. * This allows us to use the same read code yet -- cgit v1.2.3 From 2f8a467a11aeec61f5077cd337b4efe74847e1d3 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Fri, 20 Mar 2015 16:28:05 +0800 Subject: usb: otg-fsm: move 2 otg fsm timers definition to otg_fsm_timer B_DATA_PLS(data-line pulse time) and B_SSEND_SRP(session end to SRP init) are also from OTG&EH 2.0 Specification and they are not chipidea specific. Signed-off-by: Li Jun Signed-off-by: Peter Chen Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/otg-fsm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/otg-fsm.h b/include/linux/usb/otg-fsm.h index b6ba1bfb86f2..f728f1854829 100644 --- a/include/linux/usb/otg-fsm.h +++ b/include/linux/usb/otg-fsm.h @@ -53,6 +53,8 @@ enum otg_fsm_timer { B_SE0_SRP, B_SRP_FAIL, A_WAIT_ENUM, + B_DATA_PLS, + B_SSEND_SRP, NUM_OTG_FSM_TIMERS, }; -- cgit v1.2.3 From dece45855a8b0d1dcf48eb01d0822070ded6a4c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Perrochaud?= Date: Mon, 9 Mar 2015 11:12:04 +0100 Subject: NFC: nxp-nci: Add support for NXP NCI chips MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for NXP NCI NFC controllers such as the NPC100 or PN7150 families. Signed-off-by: Clément Perrochaud Signed-off-by: Samuel Ortiz --- include/linux/platform_data/nxp-nci.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 include/linux/platform_data/nxp-nci.h (limited to 'include/linux') diff --git a/include/linux/platform_data/nxp-nci.h b/include/linux/platform_data/nxp-nci.h new file mode 100644 index 000000000000..d6ed28679bb2 --- /dev/null +++ b/include/linux/platform_data/nxp-nci.h @@ -0,0 +1,27 @@ +/* + * Generic platform data for the NXP NCI NFC chips. + * + * Copyright (C) 2014 NXP Semiconductors All rights reserved. + * + * Authors: Clément Perrochaud + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _NXP_NCI_H_ +#define _NXP_NCI_H_ + +struct nxp_nci_nfc_platform_data { + unsigned int gpio_en; + unsigned int gpio_fw; + unsigned int irq; +}; + +#endif /* _NXP_NCI_H_ */ -- cgit v1.2.3 From 959801fef94b7ee66ea2c713229637a7e1770890 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Tue, 24 Feb 2015 14:25:00 -0500 Subject: serial: core: Add minor field to uart_port UART drivers that share ttyS namespace cannot trivially compute the ttyS index from the port->line value since the minor_start may be offset from minor 64. Further, to do so requires a pointer to the uart driver since there is no back pointer from uart_port to uart_driver. Rather than have UART drivers computing the minor value by themselves, encapsulate within the serial core at port registration time. Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index b0148e7bcbfa..980170e5a982 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -235,6 +235,7 @@ struct uart_port { const struct uart_ops *ops; unsigned int custom_divisor; unsigned int line; /* port index */ + unsigned int minor; resource_size_t mapbase; /* for ioremap */ struct device *dev; /* parent device */ unsigned char hub6; /* this should be in the 8250 driver */ -- cgit v1.2.3 From 828aef376d7a129547bc4ebb949965040177e3da Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 24 Mar 2015 14:02:46 +0000 Subject: ACPI / processor: Introduce phys_cpuid_t for CPU hardware ID CPU hardware ID (phys_id) is defined as u32 in structure acpi_processor, but phys_id is used as int in acpi processor driver, so it will lead to some inconsistence for the drivers. Furthermore, to cater for ACPI arch ports that implement 64 bits CPU ids a generic CPU physical id type is required. So introduce typedef u32 phys_cpuid_t in a common file, and introduce a macro PHYS_CPUID_INVALID as (phys_cpuid_t)(-1) if it's not defined by other archs, this will solve the inconsistence in acpi processor driver, and will prepare for the ACPI on ARM64 for the 64 bit CPU hardware ID in the following patch. CC: Rafael J Wysocki Suggested-by: Lorenzo Pieralisi Reviewed-by: Grant Likely Acked-by: Sudeep Holla Acked-by: Lorenzo Pieralisi Acked-by: Rafael J. Wysocki Signed-off-by: Catalin Marinas [hj: reworked cpu physid map return codes] Signed-off-by: Hanjun Guo Signed-off-by: Will Deacon --- include/linux/acpi.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 24c7aa8b1d20..6ec33c595aea 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -146,9 +146,14 @@ void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa); int acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma); void acpi_numa_arch_fixup(void); +#ifndef PHYS_CPUID_INVALID +typedef u32 phys_cpuid_t; +#define PHYS_CPUID_INVALID (phys_cpuid_t)(-1) +#endif + #ifdef CONFIG_ACPI_HOTPLUG_CPU /* Arch dependent functions for cpu hotplug support */ -int acpi_map_cpu(acpi_handle handle, int physid, int *pcpu); +int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu); int acpi_unmap_cpu(int cpu); #endif /* CONFIG_ACPI_HOTPLUG_CPU */ -- cgit v1.2.3 From fbe61ec71ac975279cd47b6c299d5e33f63aac4e Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Tue, 24 Mar 2015 14:02:48 +0000 Subject: ARM64 / ACPI: Introduce ACPI_IRQ_MODEL_GIC and register device's gsi Introduce ACPI_IRQ_MODEL_GIC which is needed for ARM64 as GIC is used, and then register device's gsi with the core IRQ subsystem. acpi_register_gsi() is similar to DT based irq_of_parse_and_map(), since gsi is unique in the system, so use hwirq number directly for the mapping. We are going to implement stacked domains when GICv2m, GICv3, ITS support are added. CC: Marc Zyngier Originally-by: Amit Daniel Kachhap Tested-by: Suravee Suthikulpanit Tested-by: Yijing Wang Tested-by: Mark Langsdorf Tested-by: Jon Masters Tested-by: Timur Tabi Tested-by: Robert Richter Acked-by: Robert Richter Acked-by: Rafael J. Wysocki Reviewed-by: Grant Likely Signed-off-by: Hanjun Guo Signed-off-by: Will Deacon --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6ec33c595aea..de4e86f89c83 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -73,6 +73,7 @@ enum acpi_irq_model_id { ACPI_IRQ_MODEL_IOAPIC, ACPI_IRQ_MODEL_IOSAPIC, ACPI_IRQ_MODEL_PLATFORM, + ACPI_IRQ_MODEL_GIC, ACPI_IRQ_MODEL_COUNT }; -- cgit v1.2.3 From d60fc3892c4de4a25658786f941690462c5a5bab Mon Sep 17 00:00:00 2001 From: Tomasz Nowicki Date: Tue, 24 Mar 2015 14:02:49 +0000 Subject: irqchip: Add GICv2 specific ACPI boot support ACPI kernel uses MADT table for proper GIC initialization. It needs to parse GIC related subtables, collect CPU interface and distributor addresses and call driver initialization function (which is hardware abstraction agnostic). In a similar way, FDT initialize GICv1/2. NOTE: This commit allow to initialize GICv1/2 basic functionality. While now simple GICv2 init call is used, any further GIC features require generic infrastructure for proper ACPI irqchip initialization. That mechanism and stacked irqdomains to support GICv2 MSI/virtualization extension, GICv3/4 and its ITS are considered as next steps. CC: Jason Cooper CC: Marc Zyngier CC: Thomas Gleixner Tested-by: Suravee Suthikulpanit Tested-by: Yijing Wang Tested-by: Mark Langsdorf Tested-by: Jon Masters Tested-by: Timur Tabi Tested-by: Robert Richter Acked-by: Robert Richter Acked-by: Marc Zyngier Acked-by: Jason Cooper Reviewed-by: Grant Likely Signed-off-by: Tomasz Nowicki Signed-off-by: Hanjun Guo Signed-off-by: Will Deacon --- include/linux/acpi_irq.h | 10 ++++++++++ include/linux/irqchip/arm-gic-acpi.h | 31 +++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 include/linux/acpi_irq.h create mode 100644 include/linux/irqchip/arm-gic-acpi.h (limited to 'include/linux') diff --git a/include/linux/acpi_irq.h b/include/linux/acpi_irq.h new file mode 100644 index 000000000000..f10c87265855 --- /dev/null +++ b/include/linux/acpi_irq.h @@ -0,0 +1,10 @@ +#ifndef _LINUX_ACPI_IRQ_H +#define _LINUX_ACPI_IRQ_H + +#include + +#ifndef acpi_irq_init +static inline void acpi_irq_init(void) { } +#endif + +#endif /* _LINUX_ACPI_IRQ_H */ diff --git a/include/linux/irqchip/arm-gic-acpi.h b/include/linux/irqchip/arm-gic-acpi.h new file mode 100644 index 000000000000..de3419ed3937 --- /dev/null +++ b/include/linux/irqchip/arm-gic-acpi.h @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2014, Linaro Ltd. + * Author: Tomasz Nowicki + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef ARM_GIC_ACPI_H_ +#define ARM_GIC_ACPI_H_ + +#ifdef CONFIG_ACPI + +/* + * Hard code here, we can not get memory size from MADT (but FDT does), + * Actually no need to do that, because this size can be inferred + * from GIC spec. + */ +#define ACPI_GICV2_DIST_MEM_SIZE (SZ_4K) +#define ACPI_GIC_CPU_IF_MEM_SIZE (SZ_8K) + +struct acpi_table_header; + +int gic_v2_acpi_init(struct acpi_table_header *table); +void acpi_gic_init(void); +#else +static inline void acpi_gic_init(void) { } +#endif + +#endif /* ARM_GIC_ACPI_H_ */ -- cgit v1.2.3 From b09ca1ecf6d499d5a33f978c905d2fbcc79b55d9 Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Tue, 24 Mar 2015 14:02:50 +0000 Subject: clocksource / arch_timer: Parse GTDT to initialize arch timer Using the information presented by GTDT (Generic Timer Description Table) to initialize the arch timer (not memory-mapped). CC: Daniel Lezcano CC: Thomas Gleixner Originally-by: Amit Daniel Kachhap Tested-by: Suravee Suthikulpanit Tested-by: Yijing Wang Tested-by: Mark Langsdorf Tested-by: Jon Masters Tested-by: Timur Tabi Tested-by: Robert Richter Acked-by: Robert Richter Acked-by: Daniel Lezcano Reviewed-by: Grant Likely Signed-off-by: Hanjun Guo Signed-off-by: Will Deacon --- include/linux/clocksource.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 9c78d15d33e4..2b2e1f80c519 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -244,4 +244,10 @@ extern void clocksource_of_init(void); static inline void clocksource_of_init(void) {} #endif +#ifdef CONFIG_ACPI +void acpi_generic_timer_init(void); +#else +static inline void acpi_generic_timer_init(void) { } +#endif + #endif /* _LINUX_CLOCKSOURCE_H */ -- cgit v1.2.3 From c7cef0a84912cab3c9df8949b034e4aa62982ec9 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Mon, 9 Mar 2015 16:27:12 -0400 Subject: console: Add extensible console matching Add match() method to struct console which allows the console to perform console command line matching instead of (or in addition to) default console matching (ie., by fixed name and index). The match() method returns 0 to indicate a successful match; normal console matching occurs if no match() method is defined or the match() method returns non-zero. The match() method is expected to set the console index if required. Re-implement earlycon-to-console-handoff with direct matching of "console=uart|uart8250,..." to the 8250 ttyS console. Acked-by: Rob Herring Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/console.h | 3 +-- include/linux/serial_8250.h | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 7571a16bd653..9f50fb413c11 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -123,7 +123,7 @@ struct console { struct tty_driver *(*device)(struct console *, int *); void (*unblank)(void); int (*setup)(struct console *, char *); - int (*early_setup)(void); + int (*match)(struct console *, char *name, int idx, char *options); short flags; short index; int cflag; @@ -141,7 +141,6 @@ extern int console_set_on_cmdline; extern struct console *early_console; extern int add_preferred_console(char *name, int idx, char *options); -extern int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options); extern void register_console(struct console *); extern int unregister_console(struct console *); extern struct console *console_drivers; diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index a8efa235b7c1..f26ae7fa30ae 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -118,8 +118,6 @@ void serial8250_resume_port(int line); extern int early_serial_setup(struct uart_port *port); -extern int serial8250_find_port(struct uart_port *p); -extern int serial8250_find_port_for_earlycon(void); extern unsigned int serial8250_early_in(struct uart_port *port, int offset); extern void serial8250_early_out(struct uart_port *port, int offset, int value); extern int setup_early_serial8250_console(char *cmdline); -- cgit v1.2.3 From 470ca0de69feaba5df215ad804cec1859883a5ed Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Mon, 9 Mar 2015 16:27:21 -0400 Subject: serial: earlycon: Enable earlycon without command line param Earlycon matching can only be triggered if 'earlycon=...' has been specified on the kernel command line. To workaround this limitation requires tight coupling between arches and specific serial drivers in order to start an earlycon. Devicetree avoids this limitation with a link table that contains the required data to match earlycons. Mirror this approach for earlycon match by name. Re-purpose EARLYCON_DECLARE to generate a table entry which associates name with setup() function. Re-purpose setup_earlycon() to scan this table for an earlycon match, which is registered if found. Declare one "earlycon" early_param, which calls setup_earlycon(). This design allows setup_earlycon() to be called directly with a param string (as if 'earlycon=...' had been set on the command line). Re-registration (either directly or by early_param) is prevented. Acked-by: Rob Herring Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 980170e5a982..8aeec4913a9c 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -337,18 +337,21 @@ struct earlycon_device { char options[16]; /* e.g., 115200n8 */ unsigned int baud; }; -int setup_earlycon(char *buf, const char *match, - int (*setup)(struct earlycon_device *, const char *)); +struct earlycon_id { + char name[16]; + int (*setup)(struct earlycon_device *, const char *options); +}; + +extern int setup_earlycon(char *buf); extern int of_setup_earlycon(unsigned long addr, int (*setup)(struct earlycon_device *, const char *)); -#define EARLYCON_DECLARE(name, func) \ -static int __init name ## _setup_earlycon(char *buf) \ -{ \ - return setup_earlycon(buf, __stringify(name), func); \ -} \ -early_param("earlycon", name ## _setup_earlycon); +#define EARLYCON_DECLARE(_name, func) \ + static const struct earlycon_id __earlycon_##_name \ + __used __section(__earlycon_table) \ + = { .name = __stringify(_name), \ + .setup = func } #define OF_EARLYCON_DECLARE(name, compat, fn) \ _OF_DECLARE(earlycon, name, compat, fn, void *) -- cgit v1.2.3 From df519e7bd33cf56d8a5ce357dfb94248d427b688 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Mon, 9 Mar 2015 16:27:22 -0400 Subject: serial: 8250_early: Remove setup_early_serial8250_console() setup_earlycon() will now match and register the desired earlycon from the param string (as if 'earlycon=...' had been set on the command line). Use setup_earlycon() from existing arch call sites which start an earlycon directly. Acked-by: Rob Herring Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_8250.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index f26ae7fa30ae..ca9f87beac63 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -120,7 +120,6 @@ extern int early_serial_setup(struct uart_port *port); extern unsigned int serial8250_early_in(struct uart_port *port, int offset); extern void serial8250_early_out(struct uart_port *port, int offset, int value); -extern int setup_early_serial8250_console(char *cmdline); extern void serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, struct ktermios *old); extern int serial8250_do_startup(struct uart_port *port); -- cgit v1.2.3 From 01218bf14ee60d4a2d6c667ebdbba3ae9a7a1d66 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 26 Mar 2015 09:47:55 -0700 Subject: of: Explicitly include linux/types.h in of_graph.h In current -next of_graph.h fails to build due to it relying on linux/types.h without explicitly including it: ../include/linux/of_graph.h:43:71: error: unknown type name 'u32' caused by bfe446e37c4e (of: Add of_graph_get_port_by_id function). Add an explicit inclusion to fix this. Signed-off-by: Mark Brown Acked-by: Laurent Pinchart Acked-by: Philipp Zabel Signed-off-by: Rob Herring --- include/linux/of_graph.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_graph.h b/include/linux/of_graph.h index 3c1c95a39e0c..7bc92e050608 100644 --- a/include/linux/of_graph.h +++ b/include/linux/of_graph.h @@ -14,6 +14,8 @@ #ifndef __LINUX_OF_GRAPH_H #define __LINUX_OF_GRAPH_H +#include + /** * struct of_endpoint - the OF graph endpoint data structure * @port: identifier (value of reg property) of a port this endpoint belongs to -- cgit v1.2.3 From a4416cd1ac7b48988f0f41a17769d65c71ffc504 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Tue, 24 Feb 2015 14:25:07 -0500 Subject: serial: 8250: Separate legacy irq handling from core port operations Prepare for 8250 split; decouple irq setup/teardown and handler from core port operations. Introduce setup_irq() and release_irq() 8250 driver methods; the 8250 core will use these methods to install and remove irq handling for the given 8250 port. Refactor irq chain linking/unlinking from 8250 core into univ8250_setup_irq()/univ8250_release_irq() for the universal 8250 driver. Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_8250.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index ca9f87beac63..50735a9ad598 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -60,6 +60,20 @@ enum { }; struct uart_8250_dma; +struct uart_8250_port; + +/** + * 8250 core driver operations + * + * @setup_irq() Setup irq handling. The universal 8250 driver links this + * port to the irq chain. Other drivers may @request_irq(). + * @release_irq() Undo irq handling. The universal 8250 driver unlinks + * the port from the irq chain. + */ +struct uart_8250_ops { + int (*setup_irq)(struct uart_8250_port *); + void (*release_irq)(struct uart_8250_port *); +}; /* * This should be used by drivers which want to register @@ -100,6 +114,7 @@ struct uart_8250_port { unsigned char msr_saved_flags; struct uart_8250_dma *dma; + const struct uart_8250_ops *ops; /* 8250 specific callbacks */ int (*dl_read)(struct uart_8250_port *); -- cgit v1.2.3 From 403753937020549e4bb0d8ef6e915f00a338a096 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Tue, 24 Feb 2015 14:25:14 -0500 Subject: serial: 8250: Decouple RSA probe Prepare for 8250 split; separate RSA probe and resource management from base port operations. Override base port operations for the config_port(), request_port() and release_port() methods to implement the optional RSA probe and resource management only in the universal/legacy 8250 driver. Introduce 'probe' flags for 8250 ports, which allows drivers higher up the driver stack to enable optional probes. Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_8250.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 50735a9ad598..78097e7a330a 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -102,6 +102,8 @@ struct uart_8250_port { unsigned char canary; /* non-zero during system sleep * if no_console_suspend */ + unsigned char probe; +#define UART_PROBE_RSA (1 << 0) /* * Some bits in registers are cleared on a read, so they must -- cgit v1.2.3 From 0fea53e255eaa889fb6cf8e10d11fbea2921eac8 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 13 Mar 2015 11:09:36 -0700 Subject: tty: serial: Remove orphaned serial driver This driver is orphaned now that mach-msm has been removed. Delete it. Cc: Greg Kroah-Hartman Cc: David Brown Cc: Bryan Huntsman Cc: Daniel Walker Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_data/msm_serial_hs.h | 49 ----------------------------- 1 file changed, 49 deletions(-) delete mode 100644 include/linux/platform_data/msm_serial_hs.h (limited to 'include/linux') diff --git a/include/linux/platform_data/msm_serial_hs.h b/include/linux/platform_data/msm_serial_hs.h deleted file mode 100644 index 98a2046f8b31..000000000000 --- a/include/linux/platform_data/msm_serial_hs.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (C) 2008 Google, Inc. - * Author: Nick Pelly - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#ifndef __ASM_ARCH_MSM_SERIAL_HS_H -#define __ASM_ARCH_MSM_SERIAL_HS_H - -#include - -/* API to request the uart clock off or on for low power management - * Clients should call request_clock_off() when no uart data is expected, - * and must call request_clock_on() before any further uart data can be - * received. */ -extern void msm_hs_request_clock_off(struct uart_port *uport); -extern void msm_hs_request_clock_on(struct uart_port *uport); - -/** - * struct msm_serial_hs_platform_data - * @rx_wakeup_irq: Rx activity irq - * @rx_to_inject: extra character to be inserted to Rx tty on wakeup - * @inject_rx: 1 = insert rx_to_inject. 0 = do not insert extra character - * @exit_lpm_cb: function called before every Tx transaction - * - * This is an optional structure required for UART Rx GPIO IRQ based - * wakeup from low power state. UART wakeup can be triggered by RX activity - * (using a wakeup GPIO on the UART RX pin). This should only be used if - * there is not a wakeup GPIO on the UART CTS, and the first RX byte is - * known (eg., with the Bluetooth Texas Instruments HCILL protocol), - * since the first RX byte will always be lost. RTS will be asserted even - * while the UART is clocked off in this mode of operation. - */ -struct msm_serial_hs_platform_data { - int rx_wakeup_irq; - unsigned char inject_rx_on_wakeup; - char rx_to_inject; - void (*exit_lpm_cb)(struct uart_port *); -}; - -#endif -- cgit v1.2.3 From e32edf4fd0fa4897e12ca66118ab67bf257e16e4 Mon Sep 17 00:00:00 2001 From: Nikolay Nikolaev Date: Thu, 26 Mar 2015 14:39:28 +0000 Subject: KVM: Redesign kvm_io_bus_ API to pass VCPU structure to the callbacks. This is needed in e.g. ARM vGIC emulation, where the MMIO handling depends on the VCPU that does the access. Signed-off-by: Nikolay Nikolaev Signed-off-by: Andre Przywara Acked-by: Paolo Bonzini Acked-by: Christoffer Dall Reviewed-by: Marc Zyngier Signed-off-by: Marc Zyngier --- include/linux/kvm_host.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ae9c72012004..9605e46fce0b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -165,12 +165,12 @@ enum kvm_bus { KVM_NR_BUSES }; -int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, +int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int len, const void *val); -int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, - int len, const void *val, long cookie); -int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, - void *val); +int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, + gpa_t addr, int len, const void *val, long cookie); +int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, + int len, void *val); int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, struct kvm_io_device *dev); int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, -- cgit v1.2.3 From ee97d0e3f06498487671c23cad4230bf9aa5fd88 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Sun, 8 Mar 2015 14:30:04 +0000 Subject: serial: 8250: allow specifying iomem size in addition to address This adds a mapsize field to struct uart_port to be used in conjunction with mapbase. If set, it overrides whatever value serial8250_port_size() would otherwise report. Signed-off-by: Mans Rullgard Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 8aeec4913a9c..34de16840152 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -237,6 +237,7 @@ struct uart_port { unsigned int line; /* port index */ unsigned int minor; resource_size_t mapbase; /* for ioremap */ + resource_size_t mapsize; struct device *dev; /* parent device */ unsigned char hub6; /* this should be in the 8250 driver */ unsigned char suspended; -- cgit v1.2.3 From 911a88829725572820dad9a168e735c606a2fdcb Mon Sep 17 00:00:00 2001 From: Alex Smith Date: Mon, 9 Mar 2015 14:29:04 +0000 Subject: memory: jz4780-nemc: driver for the NEMC on JZ4780 SoCs Add a driver for the NAND/External Memory Controller (NEMC) on JZ4780 and later SoCs. The primary function of this driver is to configure parameters, such as timings, for external memory devices using data supplied in the device tree. Devices connected to the NEMC are represented in the DT as children of the NEMC node, the driver uses optional properties specified in these child nodes to configure the parameters of each bank. Signed-off-by: Alex Smith Signed-off-by: Zubair Lutfullah Kakakhel Signed-off-by: Greg Kroah-Hartman --- include/linux/jz4780-nemc.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 include/linux/jz4780-nemc.h (limited to 'include/linux') diff --git a/include/linux/jz4780-nemc.h b/include/linux/jz4780-nemc.h new file mode 100644 index 000000000000..e7f1cc7a2284 --- /dev/null +++ b/include/linux/jz4780-nemc.h @@ -0,0 +1,43 @@ +/* + * JZ4780 NAND/external memory controller (NEMC) + * + * Copyright (c) 2015 Imagination Technologies + * Author: Alex Smith + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __LINUX_JZ4780_NEMC_H__ +#define __LINUX_JZ4780_NEMC_H__ + +#include + +struct device; + +/* + * Number of NEMC banks. Note that there are actually 6, but they are numbered + * from 1. + */ +#define JZ4780_NEMC_NUM_BANKS 7 + +/** + * enum jz4780_nemc_bank_type - device types which can be connected to a bank + * @JZ4780_NEMC_BANK_SRAM: SRAM + * @JZ4780_NEMC_BANK_NAND: NAND + */ +enum jz4780_nemc_bank_type { + JZ4780_NEMC_BANK_SRAM, + JZ4780_NEMC_BANK_NAND, +}; + +extern unsigned int jz4780_nemc_num_banks(struct device *dev); + +extern void jz4780_nemc_set_type(struct device *dev, unsigned int bank, + enum jz4780_nemc_bank_type type); +extern void jz4780_nemc_assert(struct device *dev, unsigned int bank, + bool assert); + +#endif /* __LINUX_JZ4780_NEMC_H__ */ -- cgit v1.2.3 From 36ee28e45df50c2c8624b978335516e42d84ae1f Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Fri, 27 Feb 2015 16:54:04 +0100 Subject: sched: Add sched_avg::utilization_avg_contrib Add new statistics which reflect the average time a task is running on the CPU and the sum of these running time of the tasks on a runqueue. The latter is named utilization_load_avg. This patch is based on the usage metric that was proposed in the 1st versions of the per-entity load tracking patchset by Paul Turner but that has be removed afterwards. This version differs from the original one in the sense that it's not linked to task_group. The rq's utilization_load_avg will be used to check if a rq is overloaded or not instead of trying to compute how many tasks a group of CPUs can handle. Rename runnable_avg_period into avg_period as it is now used with both runnable_avg_sum and running_avg_sum. Add some descriptions of the variables to explain their differences. Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) Acked-by: Morten Rasmussen Cc: Paul Turner Cc: Ben Segall Cc: Ben Segall Cc: Morten.Rasmussen@arm.com Cc: Paul Turner Cc: dietmar.eggemann@arm.com Cc: efault@gmx.de Cc: kamalesh@linux.vnet.ibm.com Cc: linaro-kernel@lists.linaro.org Cc: nicolas.pitre@linaro.org Cc: preeti@linux.vnet.ibm.com Cc: riel@redhat.com Link: http://lkml.kernel.org/r/1425052454-25797-2-git-send-email-vincent.guittot@linaro.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6d77432e14ff..fdca05c5f812 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1115,15 +1115,28 @@ struct load_weight { }; struct sched_avg { + u64 last_runnable_update; + s64 decay_count; + /* + * utilization_avg_contrib describes the amount of time that a + * sched_entity is running on a CPU. It is based on running_avg_sum + * and is scaled in the range [0..SCHED_LOAD_SCALE]. + * load_avg_contrib described the amount of time that a sched_entity + * is runnable on a rq. It is based on both runnable_avg_sum and the + * weight of the task. + */ + unsigned long load_avg_contrib, utilization_avg_contrib; /* * These sums represent an infinite geometric series and so are bound * above by 1024/(1-y). Thus we only need a u32 to store them for all * choices of y < 1-2^(-32)*1024. + * running_avg_sum reflects the time that the sched_entity is + * effectively running on the CPU. + * runnable_avg_sum represents the amount of time a sched_entity is on + * a runqueue which includes the running time that is monitored by + * running_avg_sum. */ - u32 runnable_avg_sum, runnable_avg_period; - u64 last_runnable_update; - s64 decay_count; - unsigned long load_avg_contrib; + u32 runnable_avg_sum, avg_period, running_avg_sum; }; #ifdef CONFIG_SCHEDSTATS -- cgit v1.2.3 From 7bd3e239d6c6d1cad276e8f130b386df4234dcd7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 Mar 2015 17:45:37 +0100 Subject: locking: Remove atomicy checks from {READ,WRITE}_ONCE The fact that volatile allows for atomic load/stores is a special case not a requirement for {READ,WRITE}_ONCE(). Their primary purpose is to force the compiler to emit load/stores _once_. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Christian Borntraeger Acked-by: Will Deacon Acked-by: Linus Torvalds Cc: Davidlohr Bueso Cc: H. Peter Anvin Cc: Paul McKenney Cc: Stephen Rothwell Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 1b45e4a0519b..0e41ca0e5927 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -192,29 +192,16 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); #include -static __always_inline void data_access_exceeds_word_size(void) -#ifdef __compiletime_warning -__compiletime_warning("data access exceeds word size and won't be atomic") -#endif -; - -static __always_inline void data_access_exceeds_word_size(void) -{ -} - static __always_inline void __read_once_size(const volatile void *p, void *res, int size) { switch (size) { case 1: *(__u8 *)res = *(volatile __u8 *)p; break; case 2: *(__u16 *)res = *(volatile __u16 *)p; break; case 4: *(__u32 *)res = *(volatile __u32 *)p; break; -#ifdef CONFIG_64BIT case 8: *(__u64 *)res = *(volatile __u64 *)p; break; -#endif default: barrier(); __builtin_memcpy((void *)res, (const void *)p, size); - data_access_exceeds_word_size(); barrier(); } } @@ -225,13 +212,10 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s case 1: *(volatile __u8 *)p = *(__u8 *)res; break; case 2: *(volatile __u16 *)p = *(__u16 *)res; break; case 4: *(volatile __u32 *)p = *(__u32 *)res; break; -#ifdef CONFIG_64BIT case 8: *(volatile __u64 *)p = *(__u64 *)res; break; -#endif default: barrier(); __builtin_memcpy((void *)p, (const void *)res, size); - data_access_exceeds_word_size(); barrier(); } } -- cgit v1.2.3 From 876e78818def2983be55878b21f7152fbaebbd36 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2015 10:09:06 +0100 Subject: time: Rename timekeeper::tkr to timekeeper::tkr_mono In preparation of adding another tkr field, rename this one to tkr_mono. Also rename tk_read_base::base_mono to tk_read_base::base, since the structure is not specific to CLOCK_MONOTONIC and the mono name got added to the tk_read_base instance. Lots of trivial churn. Signed-off-by: Peter Zijlstra (Intel) Acked-by: John Stultz Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150319093400.344679419@infradead.org Signed-off-by: Ingo Molnar --- include/linux/timekeeper_internal.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 05af9a334893..73df17f1535f 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -16,16 +16,16 @@ * @read: Read function of @clock * @mask: Bitmask for two's complement subtraction of non 64bit clocks * @cycle_last: @clock cycle value at last update - * @mult: NTP adjusted multiplier for scaled math conversion + * @mult: (NTP adjusted) multiplier for scaled math conversion * @shift: Shift value for scaled math conversion * @xtime_nsec: Shifted (fractional) nano seconds offset for readout - * @base_mono: ktime_t (nanoseconds) base time for readout + * @base: ktime_t (nanoseconds) base time for readout * * This struct has size 56 byte on 64 bit. Together with a seqcount it * occupies a single 64byte cache line. * * The struct is separate from struct timekeeper as it is also used - * for a fast NMI safe accessor to clock monotonic. + * for a fast NMI safe accessors. */ struct tk_read_base { struct clocksource *clock; @@ -35,12 +35,12 @@ struct tk_read_base { u32 mult; u32 shift; u64 xtime_nsec; - ktime_t base_mono; + ktime_t base; }; /** * struct timekeeper - Structure holding internal timekeeping values. - * @tkr: The readout base structure + * @tkr_mono: The readout base structure for CLOCK_MONOTONIC * @xtime_sec: Current CLOCK_REALTIME time in seconds * @ktime_sec: Current CLOCK_MONOTONIC time in seconds * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset @@ -76,7 +76,7 @@ struct tk_read_base { * used instead. */ struct timekeeper { - struct tk_read_base tkr; + struct tk_read_base tkr_mono; u64 xtime_sec; unsigned long ktime_sec; struct timespec64 wall_to_monotonic; -- cgit v1.2.3 From 4a4ad80d32cea69ee93bd4589f24dc478804cd80 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2015 09:28:44 +0100 Subject: time: Add timerkeeper::tkr_raw Introduce tkr_raw and make use of it. base_raw -> tkr_raw.base clock->{mult,shift} -> tkr_raw.{mult.shift} Kill timekeeping_get_ns_raw() in favour of timekeeping_get_ns(&tkr_raw), this removes all mono_raw special casing. Duplicate the updates to tkr_mono.cycle_last into tkr_raw.cycle_last, both need the same value. Signed-off-by: Peter Zijlstra (Intel) Acked-by: John Stultz Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150319093400.422589590@infradead.org Signed-off-by: Ingo Molnar --- include/linux/timekeeper_internal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 73df17f1535f..fb86963859c7 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -41,6 +41,7 @@ struct tk_read_base { /** * struct timekeeper - Structure holding internal timekeeping values. * @tkr_mono: The readout base structure for CLOCK_MONOTONIC + * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW * @xtime_sec: Current CLOCK_REALTIME time in seconds * @ktime_sec: Current CLOCK_MONOTONIC time in seconds * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset @@ -48,7 +49,6 @@ struct tk_read_base { * @offs_boot: Offset clock monotonic -> clock boottime * @offs_tai: Offset clock monotonic -> clock tai * @tai_offset: The current UTC to TAI offset in seconds - * @base_raw: Monotonic raw base time in ktime_t format * @raw_time: Monotonic raw base time in timespec64 format * @cycle_interval: Number of clock cycles in one NTP interval * @xtime_interval: Number of clock shifted nano seconds in one NTP @@ -77,6 +77,7 @@ struct tk_read_base { */ struct timekeeper { struct tk_read_base tkr_mono; + struct tk_read_base tkr_raw; u64 xtime_sec; unsigned long ktime_sec; struct timespec64 wall_to_monotonic; @@ -84,7 +85,6 @@ struct timekeeper { ktime_t offs_boot; ktime_t offs_tai; s32 tai_offset; - ktime_t base_raw; struct timespec64 raw_time; /* The following members are for timekeeping internal use */ -- cgit v1.2.3 From f09cb9a1808e35ad7502ea39b6bfb443c7fa0f19 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2015 09:39:08 +0100 Subject: time: Introduce tk_fast_raw Add the NMI safe CLOCK_MONOTONIC_RAW accessor.. Signed-off-by: Peter Zijlstra (Intel) Acked-by: John Stultz Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150319093400.562746929@infradead.org Signed-off-by: Ingo Molnar --- include/linux/timekeeping.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 3eaae4754275..f36b1edf3f73 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -220,6 +220,7 @@ static inline u64 ktime_get_raw_ns(void) } extern u64 ktime_get_mono_fast_ns(void); +extern u64 ktime_get_raw_fast_ns(void); /* * Timespec interfaces utilizing the ktime based ones -- cgit v1.2.3 From fe5fba05b46c791c95a9f34228ac495f81f72fc0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 17 Mar 2015 12:39:10 +0100 Subject: time: Add ktime_get_tai_ns() Because it was the only clock for which we didn't have a _ns() accessor yet. Signed-off-by: Peter Zijlstra (Intel) Cc: John Stultz Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/timekeeping.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index f36b1edf3f73..5047b83483d6 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -214,6 +214,11 @@ static inline u64 ktime_get_boot_ns(void) return ktime_to_ns(ktime_get_boottime()); } +static inline u64 ktime_get_tai_ns(void) +{ + return ktime_to_ns(ktime_get_clocktai()); +} + static inline u64 ktime_get_raw_ns(void) { return ktime_to_ns(ktime_get_raw()); -- cgit v1.2.3 From fe87414f71d0035756cf91a80ac256557d16b488 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Wed, 12 Mar 2014 18:33:45 +0200 Subject: ARM: OMAP2+: PRCM: split PRCM module init to their own driver files Splits the clock related provider module inits under their own driver files. Previously this was done for all modules under the common PRM driver. Signed-off-by: Tero Kristo --- include/linux/clk/ti.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 19895a3f48b7..79b76e13d904 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -215,15 +215,14 @@ struct ti_dt_clk { .node_name = name, \ } -/* Maximum number of clock memmaps */ -#define CLK_MAX_MEMMAPS 4 - /* Static memmap indices */ enum { TI_CLKM_CM = 0, TI_CLKM_CM2, TI_CLKM_PRM, TI_CLKM_SCRM, + TI_CLKM_CTRL, + CLK_MAX_MEMMAPS }; typedef void (*ti_of_clk_init_cb_t)(struct clk_hw *, struct device_node *); -- cgit v1.2.3 From 34f439278cef7b1177f8ce24f9fc81dfc6221d3b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Feb 2015 14:05:38 +0100 Subject: perf: Add per event clockid support While thinking on the whole clock discussion it occurred to me we have two distinct uses of time: 1) the tracking of event/ctx/cgroup enabled/running/stopped times which includes the self-monitoring support in struct perf_event_mmap_page. 2) the actual timestamps visible in the data records. And we've been conflating them. The first is all about tracking time deltas, nobody should really care in what time base that happens, its all relative information, as long as its internally consistent it works. The second however is what people are worried about when having to merge their data with external sources. And here we have the discussion on MONOTONIC vs MONOTONIC_RAW etc.. Where MONOTONIC is good for correlating between machines (static offset), MONOTNIC_RAW is required for correlating against a fixed rate hardware clock. This means configurability; now 1) makes that hard because it needs to be internally consistent across groups of unrelated events; which is why we had to have a global perf_clock(). However, for 2) it doesn't really matter, perf itself doesn't care what it writes into the buffer. The below patch makes the distinction between these two cases by adding perf_event_clock() which is used for the second case. It further makes this configurable on a per-event basis, but adds a few sanity checks such that we cannot combine events with different clocks in confusing ways. And since we then have per-event configurability we might as well retain the 'legacy' behaviour as a default. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b16eac5f54ce..401554074de9 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -173,6 +173,7 @@ struct perf_event; * pmu::capabilities flags */ #define PERF_PMU_CAP_NO_INTERRUPT 0x01 +#define PERF_PMU_CAP_NO_NMI 0x02 /** * struct pmu - generic performance monitoring unit @@ -457,6 +458,7 @@ struct perf_event { struct pid_namespace *ns; u64 id; + u64 (*clock)(void); perf_overflow_handler_t overflow_handler; void *overflow_handler_context; -- cgit v1.2.3 From 554ef3876c6acdff1331feab10275e9e9e0adb84 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 27 Feb 2015 17:21:32 +0530 Subject: clockevents: Handle tick device's resume separately Upcoming patch will redefine possible states of a clockevent device. The RESUME mode is a special case only for tick's clockevent devices. In future it can be replaced by ->resume() callback already available for clockevent devices. Lets handle it separately so that clockevents_set_mode() only handles states valid across all devices. This also renames set_mode_resume() to tick_resume() to make it more explicit. Signed-off-by: Viresh Kumar Acked-by: Peter Zijlstra Cc: Daniel Lezcano Cc: Frederic Weisbecker Cc: Kevin Hilman Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: linaro-kernel@lists.linaro.org Cc: linaro-networking@linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/c1b0112410870f49e7bf06958e1483eac6c15e20.1425037853.git.viresh.kumar@linaro.org Signed-off-by: Ingo Molnar --- include/linux/clockchips.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 59af26b54d15..a41749543d48 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -87,7 +87,7 @@ enum clock_event_mode { * @set_mode_periodic: switch mode to periodic, if !set_mode * @set_mode_oneshot: switch mode to oneshot, if !set_mode * @set_mode_shutdown: switch mode to shutdown, if !set_mode - * @set_mode_resume: resume clkevt device, if !set_mode + * @tick_resume: resume clkevt device, if !set_mode * @broadcast: function to broadcast events * @min_delta_ticks: minimum delta value in ticks stored for reconfiguration * @max_delta_ticks: maximum delta value in ticks stored for reconfiguration @@ -125,7 +125,7 @@ struct clock_event_device { int (*set_mode_periodic)(struct clock_event_device *); int (*set_mode_oneshot)(struct clock_event_device *); int (*set_mode_shutdown)(struct clock_event_device *); - int (*set_mode_resume)(struct clock_event_device *); + int (*tick_resume)(struct clock_event_device *); void (*broadcast)(const struct cpumask *mask); void (*suspend)(struct clock_event_device *); -- cgit v1.2.3 From 77e32c89a7117614ab3d66d20c1088de721abfaa Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 27 Feb 2015 17:21:33 +0530 Subject: clockevents: Manage device's state separately for the core 'enum clock_event_mode' is used for two purposes today: - to pass mode to the driver of clockevent device::set_mode(). - for managing state of the device for clockevents core. For supporting new modes/states we have moved away from the legacy set_mode() callback to new per-mode/state callbacks. New modes/states shouldn't be exposed to the legacy (now OBSOLOTE) callbacks and so we shouldn't add new states to 'enum clock_event_mode'. Lets have separate enums for the two use cases mentioned above. Keep using the earlier enum for legacy set_mode() callback and mark it OBSOLETE. And add another enum to clearly specify the possible states of a clockevent device. This also renames the newly added per-mode callbacks to reflect state changes. We haven't got rid of 'mode' member of 'struct clock_event_device' as it is used by some of the clockevent drivers and it would automatically die down once we migrate those drivers to the new interface. It ('mode') is only updated now for the drivers using the legacy interface. Suggested-by: Peter Zijlstra Suggested-by: Ingo Molnar Signed-off-by: Viresh Kumar Acked-by: Peter Zijlstra Cc: Daniel Lezcano Cc: Frederic Weisbecker Cc: Kevin Hilman Cc: Preeti U Murthy Cc: linaro-kernel@lists.linaro.org Cc: linaro-networking@linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/b6b0143a8a57bd58352ad35e08c25424c879c0cb.1425037853.git.viresh.kumar@linaro.org Signed-off-by: Ingo Molnar --- include/linux/clockchips.h | 44 +++++++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index a41749543d48..e20232c3320a 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -32,15 +32,31 @@ enum clock_event_nofitiers { struct clock_event_device; struct module; -/* Clock event mode commands */ +/* Clock event mode commands for legacy ->set_mode(): OBSOLETE */ enum clock_event_mode { CLOCK_EVT_MODE_UNUSED = 0, CLOCK_EVT_MODE_SHUTDOWN, CLOCK_EVT_MODE_PERIODIC, CLOCK_EVT_MODE_ONESHOT, CLOCK_EVT_MODE_RESUME, +}; - /* Legacy ->set_mode() callback doesn't support below modes */ +/* + * Possible states of a clock event device. + * + * DETACHED: Device is not used by clockevents core. Initial state or can be + * reached from SHUTDOWN. + * SHUTDOWN: Device is powered-off. Can be reached from PERIODIC or ONESHOT. + * PERIODIC: Device is programmed to generate events periodically. Can be + * reached from DETACHED or SHUTDOWN. + * ONESHOT: Device is programmed to generate event only once. Can be reached + * from DETACHED or SHUTDOWN. + */ +enum clock_event_state { + CLOCK_EVT_STATE_DETACHED = 0, + CLOCK_EVT_STATE_SHUTDOWN, + CLOCK_EVT_STATE_PERIODIC, + CLOCK_EVT_STATE_ONESHOT, }; /* @@ -80,13 +96,14 @@ enum clock_event_mode { * @min_delta_ns: minimum delta value in ns * @mult: nanosecond to cycles multiplier * @shift: nanoseconds to cycles divisor (power of two) - * @mode: operating mode assigned by the management code + * @mode: operating mode, relevant only to ->set_mode(), OBSOLETE + * @state: current state of the device, assigned by the core code * @features: features * @retries: number of forced programming retries * @set_mode: legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME. - * @set_mode_periodic: switch mode to periodic, if !set_mode - * @set_mode_oneshot: switch mode to oneshot, if !set_mode - * @set_mode_shutdown: switch mode to shutdown, if !set_mode + * @set_state_periodic: switch state to periodic, if !set_mode + * @set_state_oneshot: switch state to oneshot, if !set_mode + * @set_state_shutdown: switch state to shutdown, if !set_mode * @tick_resume: resume clkevt device, if !set_mode * @broadcast: function to broadcast events * @min_delta_ticks: minimum delta value in ticks stored for reconfiguration @@ -111,20 +128,21 @@ struct clock_event_device { u32 mult; u32 shift; enum clock_event_mode mode; + enum clock_event_state state; unsigned int features; unsigned long retries; /* - * Mode transition callback(s): Only one of the two groups should be + * State transition callback(s): Only one of the two groups should be * defined: * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME. - * - set_mode_{shutdown|periodic|oneshot|resume}(). + * - set_state_{shutdown|periodic|oneshot}(), tick_resume(). */ void (*set_mode)(enum clock_event_mode mode, struct clock_event_device *); - int (*set_mode_periodic)(struct clock_event_device *); - int (*set_mode_oneshot)(struct clock_event_device *); - int (*set_mode_shutdown)(struct clock_event_device *); + int (*set_state_periodic)(struct clock_event_device *); + int (*set_state_oneshot)(struct clock_event_device *); + int (*set_state_shutdown)(struct clock_event_device *); int (*tick_resume)(struct clock_event_device *); void (*broadcast)(const struct cpumask *mask); @@ -177,8 +195,8 @@ extern int clockevents_update_freq(struct clock_event_device *ce, u32 freq); extern void clockevents_exchange_device(struct clock_event_device *old, struct clock_event_device *new); -extern void clockevents_set_mode(struct clock_event_device *dev, - enum clock_event_mode mode); +extern void clockevents_set_state(struct clock_event_device *dev, + enum clock_event_state state); extern int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, bool force); -- cgit v1.2.3 From 40433267331bc6b9d70d5cdd14bfa2c8e3e5f0ec Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 26 Mar 2015 08:43:37 +1100 Subject: mmc: core: Remove the ->enable|disable() callbacks These callbacks have been set to deprecated for some time. The last user (omap_hsmmc) has moved away from using them, which thus enables us to completely remove them. Signed-off-by: NeilBrown Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 0c8cbe5d1550..b5bedaec6223 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -79,12 +79,6 @@ struct mmc_ios { }; struct mmc_host_ops { - /* - * 'enable' is called when the host is claimed and 'disable' is called - * when the host is released. 'enable' and 'disable' are deprecated. - */ - int (*enable)(struct mmc_host *host); - int (*disable)(struct mmc_host *host); /* * It is optional for the host to implement pre_req and post_req in * order to support double buffering of requests (prepare one -- cgit v1.2.3 From b7a5646fa5d5d319b2b1a3db07f615e40b184205 Mon Sep 17 00:00:00 2001 From: Andreas Fenkart Date: Fri, 20 Mar 2015 15:53:54 +0100 Subject: ARM: OMAP2: HSMMC: explicit fields to declare cover/card detect pin board-rx51 has no card detect pin in the mmc slot, but can detect that the (cell-phone) cover has been removed and the card is accessible. The semantics between cover/card detect differ, the gpio on the slot informs you after the card has been removed, cover removal does not necessarily mean that the card has been removed. This means different code paths are necessary. To complete this we also want different fields in the platform data for cover and card detect. This separation is not pushed all the way down into struct omap2_hsmmc_info which is used to initialize the platform data. If we did that we had to go over all board files and set the new gpio_cod pin to -EINVAL. If we forget one board or some out-of-tree archicture forgets that the default '0' is used which is a valid pin number. Signed-off-by: Andreas Fenkart Acked-by: Tony Lindgren Signed-off-by: Ulf Hansson --- include/linux/platform_data/hsmmc-omap.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/hsmmc-omap.h b/include/linux/platform_data/hsmmc-omap.h index 67bbcf0785f6..8e981be2e2c2 100644 --- a/include/linux/platform_data/hsmmc-omap.h +++ b/include/linux/platform_data/hsmmc-omap.h @@ -55,9 +55,6 @@ struct omap_hsmmc_platform_data { u32 caps; /* Used for the MMC driver on 2430 and later */ u32 pm_caps; /* PM capabilities of the mmc */ - /* switch pin can be for card detect (default) or card cover */ - unsigned cover:1; - /* use the internal clock */ unsigned internal_clock:1; @@ -73,7 +70,8 @@ struct omap_hsmmc_platform_data { #define HSMMC_HAS_HSPE_SUPPORT (1 << 2) unsigned features; - int switch_pin; /* gpio (card detect) */ + int gpio_cd; /* gpio (card detect) */ + int gpio_cod; /* gpio (cover detect) */ int gpio_wp; /* gpio (write protect) */ int (*set_power)(struct device *dev, int power_on, int vdd); -- cgit v1.2.3 From 772742a6c7ea4612fe043353531e6435ed33e719 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Wed, 18 Mar 2015 12:24:40 +0000 Subject: arm-cci: Get rid of secure transactions for PMU driver Avoid secure transactions while probing the CCI PMU. The existing code makes use of the Peripheral ID2 (PID2) register to determine the revision of the CCI400, which requires a secure transaction. This puts a limitation on the usage of the driver on systems running non-secure Linux(e.g, ARM64). Updated the device-tree binding for cci pmu node to add the explicit revision number for the compatible field. The supported strings are : arm,cci-400-pmu,r0 arm,cci-400-pmu,r1 arm,cci-400-pmu - DEPRECATED. See NOTE below NOTE: If the revision is not mentioned, we need to probe the cci revision, which could be fatal on a platform running non-secure. We need a reliable way to know if we can poke the CCI registers at runtime on ARM32. We depend on 'mcpm_is_available()' when it is available. mcpm_is_available() returns true only when there is a registered driver for mcpm. Otherwise, we assume that we don't have secure access, and skips probing the revision number(ARM64 case). The MCPM should figure out if it is safe to access the CCI. Unfortunately there isn't a reliable way to indicate the same via dtb. This patch doesn't address/change the current situation. It only deals with the CCI-PMU, leaving the assumptions about the secure access as it has been, prior to this patch. Cc: devicetree@vger.kernel.org Cc: Punit Agrawal Tested-by: Sudeep Holla Acked-by: Nicolas Pitre Acked-by: Mark Rutland Signed-off-by: Suzuki K. Poulose Signed-off-by: Will Deacon --- include/linux/arm-cci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/arm-cci.h b/include/linux/arm-cci.h index 79d6edf446d5..aede5c765eec 100644 --- a/include/linux/arm-cci.h +++ b/include/linux/arm-cci.h @@ -24,6 +24,8 @@ #include #include +#include + struct device_node; #ifdef CONFIG_ARM_CCI -- cgit v1.2.3 From ee8e5d5fbec0e880b18bbdbfe12de53ab1dec21f Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Wed, 18 Mar 2015 12:24:41 +0000 Subject: arm-cci: Split the code for PMU vs driver support This patch separates the PMU driver code from the low level CCI driver code and enables the PMU driver for ARM64. Introduces config options for both. ARM_CCI400_PORT_CTRL - controls the low level driver code for CCI400 ports. ARM_CCI400_PMU - controls the PMU driver code ARM_CCI400_COMMON - Common defintions for CCI400 This patch also changes: ARM_CCI - common code for probing the CCI devices. This can be used for adding support for newer CCI versions(e.g, CCI-500). Cc: Bartlomiej Zolnierkiewicz Cc: Kukjin Kim Cc: Abhilash Kesavan Cc: Liviu Dudau Cc: Lorenzo Pieralisi Cc: Sudeep Holla Cc: Nicolas Pitre Cc: Punit Agrawal Acked-by: Sudeep Holla Acked-by: Nicolas Pitre Acked-by: Punit Agrawal Signed-off-by: Suzuki K. Poulose Signed-off-by: Will Deacon --- include/linux/arm-cci.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/arm-cci.h b/include/linux/arm-cci.h index aede5c765eec..521ec1f2e6bc 100644 --- a/include/linux/arm-cci.h +++ b/include/linux/arm-cci.h @@ -30,12 +30,16 @@ struct device_node; #ifdef CONFIG_ARM_CCI extern bool cci_probed(void); +#else +static inline bool cci_probed(void) { return false; } +#endif + +#ifdef CONFIG_ARM_CCI400_PORT_CTRL extern int cci_ace_get_port(struct device_node *dn); extern int cci_disable_port_by_cpu(u64 mpidr); extern int __cci_control_port_by_device(struct device_node *dn, bool enable); extern int __cci_control_port_by_index(u32 port, bool enable); #else -static inline bool cci_probed(void) { return false; } static inline int cci_ace_get_port(struct device_node *dn) { return -ENODEV; @@ -51,6 +55,7 @@ static inline int __cci_control_port_by_index(u32 port, bool enable) return -ENODEV; } #endif + #define cci_disable_port_by_device(dev) \ __cci_control_port_by_device(dev, false) #define cci_enable_port_by_device(dev) \ -- cgit v1.2.3 From 5b77d162a3d7359a8a8d83776720da065bf4e77b Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 23 Mar 2015 09:26:36 +0100 Subject: i2c: slave: rework the slave API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After more discussion, brave users, and additional datasheet evaluation, some API updates for the new I2C slave framework became imminent. The slave events now get some easier to understand naming. Also, the event handling has been simplified to only need a single call to the slave callback when an action by the backend is required. Reported-by: Uwe Kleine-König Signed-off-by: Wolfram Sang Acked-by: Geert Uytterhoeven Acked-by: Uwe Kleine-König Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 243d1a1d78b2..c5e4bb2c5759 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -253,10 +253,10 @@ static inline void i2c_set_clientdata(struct i2c_client *dev, void *data) #if IS_ENABLED(CONFIG_I2C_SLAVE) enum i2c_slave_event { - I2C_SLAVE_REQ_READ_START, - I2C_SLAVE_REQ_READ_END, - I2C_SLAVE_REQ_WRITE_START, - I2C_SLAVE_REQ_WRITE_END, + I2C_SLAVE_READ_REQUESTED, + I2C_SLAVE_WRITE_REQUESTED, + I2C_SLAVE_READ_PROCESSED, + I2C_SLAVE_WRITE_RECEIVED, I2C_SLAVE_STOP, }; -- cgit v1.2.3 From 9faa643855df6f673f08543576c759b82dd270d3 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 27 Mar 2015 16:46:30 +0100 Subject: libata: use READ_LOG_DMA_EXT If READ_LOG_DMA_EXT is supported we should try to use it for reading the log pages. Signed-off-by: Hannes Reinecke Signed-off-by: Tejun Heo --- include/linux/ata.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 1648026e06b4..681520f8a3de 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -696,6 +696,13 @@ static inline bool ata_id_wcache_enabled(const u16 *id) return id[ATA_ID_CFS_ENABLE_1] & (1 << 5); } +static inline bool ata_id_has_read_log_dma_ext(const u16 *id) +{ + if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15))) + return false; + return id[ATA_ID_COMMAND_SET_3] & (1 << 3); +} + /** * ata_id_major_version - get ATA level of drive * @id: Identify data -- cgit v1.2.3 From 27f00e53af72702d3a9e6f75590e66fe04914149 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 27 Mar 2015 16:46:33 +0100 Subject: ide,ata: Rename ATA_IDX to ATA_SENSE ATA-8 defines bit 1 as 'ATA_SENSE', not 'ATA_IDX'. Signed-off-by: Hannes Reinecke Signed-off-by: Tejun Heo --- include/linux/ata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 681520f8a3de..e3cb41c17ba7 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -177,7 +177,7 @@ enum { ATA_DSC = (1 << 4), /* drive seek complete */ ATA_DRQ = (1 << 3), /* data request i/o */ ATA_CORR = (1 << 2), /* corrected data error */ - ATA_IDX = (1 << 1), /* index */ + ATA_SENSE = (1 << 1), /* sense code available */ ATA_ERR = (1 << 0), /* have an error */ ATA_SRST = (1 << 2), /* software reset */ ATA_ICRC = (1 << 7), /* interface CRC error */ -- cgit v1.2.3 From 42b966fbf35da9c87f08d98f9b8978edf9e717cf Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 27 Mar 2015 16:46:35 +0100 Subject: libata: Implement NCQ autosense Some newer devices support NCQ autosense (cf ACS-4), so we should be using it to retrieve the sense code and speed up recovery. Signed-off-by: Hannes Reinecke Signed-off-by: Tejun Heo --- include/linux/ata.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index e3cb41c17ba7..0c65526e9295 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -525,6 +525,8 @@ struct ata_bmdma_prd { #define ata_id_cdb_intr(id) (((id)[ATA_ID_CONFIG] & 0x60) == 0x20) #define ata_id_has_da(id) ((id)[ATA_ID_SATA_CAPABILITY_2] & (1 << 4)) #define ata_id_has_devslp(id) ((id)[ATA_ID_FEATURE_SUPP] & (1 << 8)) +#define ata_id_has_ncq_autosense(id) \ + ((id)[ATA_ID_FEATURE_SUPP] & (1 << 7)) static inline bool ata_id_has_hipm(const u16 *id) { -- cgit v1.2.3 From fe7173c206de63fc28475ee6ae42ff95c05692de Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 27 Mar 2015 16:46:36 +0100 Subject: libata: Implement support for sense data reporting ACS-4 defines a sense data reporting feature set. This patch implements support for it. Signed-off-by: Hannes Reinecke Signed-off-by: Tejun Heo --- include/linux/ata.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 0c65526e9295..b666b773e111 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -94,6 +94,8 @@ enum { ATA_ID_SECTOR_SIZE = 106, ATA_ID_WWN = 108, ATA_ID_LOGICAL_SECTOR_SIZE = 117, /* and 118 */ + ATA_ID_COMMAND_SET_3 = 119, + ATA_ID_COMMAND_SET_4 = 120, ATA_ID_LAST_LUN = 126, ATA_ID_DLF = 128, ATA_ID_CSFO = 129, @@ -382,6 +384,8 @@ enum { SATA_SSP = 0x06, /* Software Settings Preservation */ SATA_DEVSLP = 0x09, /* Device Sleep */ + SETFEATURE_SENSE_DATA = 0xC3, /* Sense Data Reporting feature */ + /* feature values for SET_MAX */ ATA_SET_MAX_ADDR = 0x00, ATA_SET_MAX_PASSWD = 0x01, @@ -705,6 +709,20 @@ static inline bool ata_id_has_read_log_dma_ext(const u16 *id) return id[ATA_ID_COMMAND_SET_3] & (1 << 3); } +static inline bool ata_id_has_sense_reporting(const u16 *id) +{ + if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15))) + return false; + return id[ATA_ID_COMMAND_SET_3] & (1 << 6); +} + +static inline bool ata_id_sense_reporting_enabled(const u16 *id) +{ + if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15))) + return false; + return id[ATA_ID_COMMAND_SET_4] & (1 << 6); +} + /** * ata_id_major_version - get ATA level of drive * @id: Identify data -- cgit v1.2.3 From 27842bb18b004a2802f4b3221c79ce638c4bf6ee Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 13 Mar 2015 11:09:39 -0700 Subject: mmc: Remove msm_sdcc driver This driver is orphaned now that mach-msm has been removed. Delete it. Cc: Chris Ball Cc: David Brown Cc: Bryan Huntsman Cc: Daniel Walker Signed-off-by: Stephen Boyd Acked-by: Ulf Hansson Signed-off-by: Kumar Gala --- include/linux/platform_data/mmc-msm_sdcc.h | 27 --------------------------- 1 file changed, 27 deletions(-) delete mode 100644 include/linux/platform_data/mmc-msm_sdcc.h (limited to 'include/linux') diff --git a/include/linux/platform_data/mmc-msm_sdcc.h b/include/linux/platform_data/mmc-msm_sdcc.h deleted file mode 100644 index 55aa873c9396..000000000000 --- a/include/linux/platform_data/mmc-msm_sdcc.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef __MMC_MSM_SDCC_H -#define __MMC_MSM_SDCC_H - -#include -#include -#include - -struct msm_mmc_gpio { - unsigned no; - const char *name; -}; - -struct msm_mmc_gpio_data { - struct msm_mmc_gpio *gpio; - u8 size; -}; - -struct msm_mmc_platform_data { - unsigned int ocr_mask; /* available voltages */ - u32 (*translate_vdd)(struct device *, unsigned int); - unsigned int (*status)(struct device *); - int (*register_status_notify)(void (*callback)(int card_present, void *dev_id), void *dev_id); - struct msm_mmc_gpio_data *gpio_data; - void (*init_card)(struct mmc_card *card); -}; - -#endif -- cgit v1.2.3 From 4e59080397faadee59d39ffa2116dc8607adc9c9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 9 Mar 2015 14:01:25 -0400 Subject: NFSv4.1: Allow getdeviceinfo to return notification info back to caller We are only allowed to cache deviceinfo if the server supports notifications and actually promises to call us back when changes occur. Right now, we request those notifications, but then we don't check the server's reply. Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 4cb3eaa89cf7..3d88908fd140 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -255,11 +255,13 @@ struct nfs4_layoutget { struct nfs4_getdeviceinfo_args { struct nfs4_sequence_args seq_args; struct pnfs_device *pdev; + __u32 notify_types; }; struct nfs4_getdeviceinfo_res { struct nfs4_sequence_res seq_res; struct pnfs_device *pdev; + __u32 notification; }; struct nfs4_layoutcommit_args { -- cgit v1.2.3 From 9e1681c2e74bdd84bad6f74b47a4d88ad2f433df Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 25 Mar 2015 17:23:31 -0400 Subject: NFSv4: Truncating file opens should also sync O_DIRECT writes We don't just want to sync out buffered writes, but also O_DIRECT ones. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b01ccf371fdc..b638eb6727c6 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -512,6 +512,7 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned * Try to write back everything synchronously (but check the * return value!) */ +extern int nfs_sync_inode(struct inode *inode); extern int nfs_wb_all(struct inode *inode); extern int nfs_wb_page(struct inode *inode, struct page* page); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); -- cgit v1.2.3 From 8cc7f33aadc8fb37b5a3f4c46f5fa83748a92a01 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 13 Mar 2015 00:38:39 -0700 Subject: mtd: spi-nor: factor out replace-able flash_{lock,unlock} Flash lock/unlock is a flash-specific operations. Factor out a callback for it to more readily support other vendors. Signed-off-by: Brian Norris Tested-by: VIET NGA DAO --- include/linux/mtd/spi-nor.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 4720b86ee73d..e5409524bb0a 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -155,6 +155,8 @@ enum spi_nor_option_flags { * @write: [DRIVER-SPECIFIC] write data to the SPI NOR * @erase: [DRIVER-SPECIFIC] erase a sector of the SPI NOR * at the offset @offs + * @lock: [FLASH-SPECIFIC] lock a region of the SPI NOR + * @unlock: [FLASH-SPECIFIC] unlock a region of the SPI NOR * @priv: the private data */ struct spi_nor { @@ -189,6 +191,9 @@ struct spi_nor { size_t len, size_t *retlen, const u_char *write_buf); int (*erase)(struct spi_nor *nor, loff_t offs); + int (*flash_lock)(struct spi_nor *nor, loff_t ofs, uint64_t len); + int (*flash_unlock)(struct spi_nor *nor, loff_t ofs, uint64_t len); + void *priv; }; -- cgit v1.2.3 From 0384e8c6c6fa49dae44ffad31958e9b897da0160 Mon Sep 17 00:00:00 2001 From: Pantelis Antoniou Date: Wed, 21 Jan 2015 19:05:57 +0200 Subject: of: Empty node & property flag accessors when !OF Introduce empty node and property flag accessors when CONFIG_OF is not defined. This allows us to use them without ifdef'ing them in places where it makes sense to do so. Signed-off-by: Pantelis Antoniou Acked-by: Rob Herring Signed-off-by: Grant Likely --- include/linux/of.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index dfde07e77a63..7ede4496bad6 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -616,6 +616,38 @@ static inline const char *of_prop_next_string(struct property *prop, return NULL; } +static inline int of_node_check_flag(struct device_node *n, unsigned long flag) +{ + return 0; +} + +static inline int of_node_test_and_set_flag(struct device_node *n, + unsigned long flag) +{ + return 0; +} + +static inline void of_node_set_flag(struct device_node *n, unsigned long flag) +{ +} + +static inline void of_node_clear_flag(struct device_node *n, unsigned long flag) +{ +} + +static inline int of_property_check_flag(struct property *p, unsigned long flag) +{ + return 0; +} + +static inline void of_property_set_flag(struct property *p, unsigned long flag) +{ +} + +static inline void of_property_clear_flag(struct property *p, unsigned long flag) +{ +} + #define of_match_ptr(_ptr) NULL #define of_match_node(_matches, _node) NULL #endif /* CONFIG_OF */ -- cgit v1.2.3 From a878a1a61a1f0e4b23602ddd87b1408a7a748d0e Mon Sep 17 00:00:00 2001 From: Antonio Fiol Date: Sat, 28 Mar 2015 09:07:14 +0100 Subject: iio: max517: Add support for MAX520 and MAX521 chips. MAX520 and MAX521 are protocol-compatible with the already supported chips, just have more channels. Signed-off-by: Antonio Fiol Signed-off-by: Jonathan Cameron --- include/linux/iio/dac/max517.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/dac/max517.h b/include/linux/iio/dac/max517.h index f6d1d252f08d..7668716cd73c 100644 --- a/include/linux/iio/dac/max517.h +++ b/include/linux/iio/dac/max517.h @@ -9,7 +9,7 @@ #define IIO_DAC_MAX517_H_ struct max517_platform_data { - u16 vref_mv[2]; + u16 vref_mv[8]; }; #endif /* IIO_DAC_MAX517_H_ */ -- cgit v1.2.3 From 37d3455672732b29a477732a94abfe95e199f0ce Mon Sep 17 00:00:00 2001 From: Josselin Costanzi Date: Sun, 22 Mar 2015 20:33:38 +0200 Subject: iio: add watermark logic to iio read and poll Currently the IIO buffer blocking read only wait until at least one data element is available. This patch makes the reader sleep until enough data is collected before returning to userspace. This should limit the read() calls count when trying to get data in batches. Co-author: Yannick Bedhomme Signed-off-by: Josselin Costanzi [rebased and remove buffer timeout] Signed-off-by: Octavian Purdila Reviewed-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron --- include/linux/iio/buffer.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h index b65850a41127..eb8622b78ec9 100644 --- a/include/linux/iio/buffer.h +++ b/include/linux/iio/buffer.h @@ -21,8 +21,8 @@ struct iio_buffer; * struct iio_buffer_access_funcs - access functions for buffers. * @store_to: actually store stuff to the buffer * @read_first_n: try to get a specified number of bytes (must exist) - * @data_available: indicates whether data for reading from the buffer is - * available. + * @data_available: indicates how much data is available for reading from + * the buffer. * @request_update: if a parameter change has been marked, update underlying * storage. * @set_bytes_per_datum:set number of bytes per datum @@ -43,7 +43,7 @@ struct iio_buffer_access_funcs { int (*read_first_n)(struct iio_buffer *buffer, size_t n, char __user *buf); - bool (*data_available)(struct iio_buffer *buffer); + size_t (*data_available)(struct iio_buffer *buffer); int (*request_update)(struct iio_buffer *buffer); @@ -72,6 +72,7 @@ struct iio_buffer_access_funcs { * @demux_bounce: [INTERN] buffer for doing gather from incoming scan. * @buffer_list: [INTERN] entry in the devices list of current buffers. * @ref: [INTERN] reference count of the buffer. + * @watermark: [INTERN] number of datums to wait for poll/read. */ struct iio_buffer { int length; @@ -90,6 +91,7 @@ struct iio_buffer { void *demux_bounce; struct list_head buffer_list; struct kref ref; + unsigned int watermark; }; /** -- cgit v1.2.3 From f4f4673b7535eff4ee1a8cfb1685fa1e1a0cb79d Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Sun, 22 Mar 2015 20:33:39 +0200 Subject: iio: add support for hardware fifo Some devices have hardware buffers that can store a number of samples for later consumption. Hardware usually provides interrupts to notify the processor when the FIFO is full or when it has reached a certain watermark level. This helps with reducing the number of interrupts to the host processor and thus it helps decreasing the power consumption. This patch enables usage of hardware FIFOs for IIO devices in conjunction with software device buffers. When the hardware FIFO is enabled the samples are stored in the hardware FIFO. The samples are later flushed to the device software buffer when the number of entries in the hardware FIFO reaches the hardware watermark or when a flush operation is triggered by the user when doing a non-blocking read on an empty software device buffer. In order to implement hardware FIFO support the device drivers must implement the following new operations: setting and getting the hardware FIFO watermark level, flushing the hardware FIFO to the software device buffer. The device must also expose information about the hardware FIFO such it's minimum and maximum watermark and if necessary a list of supported watermark values. Finally, the device driver must activate the hardware FIFO when the device buffer is enabled, if the current device settings allows it. The software device buffer watermark is passed by the IIO core to the device driver as a hint for the hardware FIFO watermark. The device driver can adjust this value to allow for hardware limitations (such as capping it to the maximum hardware watermark or adjust it to a value that is supported by the hardware). It can also disable the hardware watermark (and implicitly the hardware FIFO) it this value is below the minimum hardware watermark. Since a driver may support hardware FIFO only when not in triggered buffer mode (due to different semantics of hardware FIFO sampling and triggered sampling) this patch changes the IIO core code to allow falling back to non-triggered buffered mode if no trigger is enabled. Signed-off-by: Octavian Purdila Reviewed-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 80d855061064..d86b753e9b30 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -338,6 +338,16 @@ struct iio_dev; * provide a custom of_xlate function that reads the * *args* and returns the appropriate index in registered * IIO channels array. + * @hwfifo_set_watermark: function pointer to set the current hardware + * fifo watermark level; see hwfifo_* entries in + * Documentation/ABI/testing/sysfs-bus-iio for details on + * how the hardware fifo operates + * @hwfifo_flush_to_buffer: function pointer to flush the samples stored + * in the hardware fifo to the device buffer. The driver + * should not flush more than count samples. The function + * must return the number of samples flushed, 0 if no + * samples were flushed or a negative integer if no samples + * were flushed and there was an error. **/ struct iio_info { struct module *driver_module; @@ -399,6 +409,9 @@ struct iio_info { unsigned *readval); int (*of_xlate)(struct iio_dev *indio_dev, const struct of_phandle_args *iiospec); + int (*hwfifo_set_watermark)(struct iio_dev *indio_dev, unsigned val); + int (*hwfifo_flush_to_buffer)(struct iio_dev *indio_dev, + unsigned count); }; /** -- cgit v1.2.3 From fa6ed4cb6fd44ef4c6fad4d9572119d22381f32c Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Mon, 23 Mar 2015 12:32:01 +0000 Subject: irqchip: mips-gic: Add new functions to start/stop the GIC counter We add new functions to start and stop the GIC counter since there are no guarantees the counter will be running after a CPU reset. The GIC counter is stopped by setting the 29th bit on the GIC Config register and it is started by clearing that bit. Cc: Thomas Gleixner Cc: Jason Cooper Cc: Andrew Bresticker Cc: Qais Yousef Cc: Signed-off-by: Markos Chandras Link: https://lkml.kernel.org/r/1427113923-9840-2-git-send-email-markos.chandras@imgtec.com Signed-off-by: Jason Cooper --- include/linux/irqchip/mips-gic.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index e6a6aac451db..3ea2e4754c40 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -240,6 +240,8 @@ extern unsigned int gic_get_count_width(void); extern cycle_t gic_read_compare(void); extern void gic_write_compare(cycle_t cnt); extern void gic_write_cpu_compare(cycle_t cnt, int cpu); +extern void gic_start_count(void); +extern void gic_stop_count(void); extern void gic_send_ipi(unsigned int intr); extern unsigned int plat_ipi_call_int_xlate(unsigned int); extern unsigned int plat_ipi_resched_int_xlate(unsigned int); -- cgit v1.2.3 From 608cd71a9c7c9db76e78a792c5a4101e12fea43f Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 26 Mar 2015 19:53:57 -0700 Subject: tc: bpf: generalize pedit action existing TC action 'pedit' can munge any bits of the packet. Generalize it for use in bpf programs attached as cls_bpf and act_bpf via bpf_skb_store_bytes() helper function. Signed-off-by: Alexei Starovoitov Reviewed-by: Jiri Pirko Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 280a315de8d6..d5cda067115a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -59,6 +59,7 @@ enum bpf_arg_type { ARG_PTR_TO_STACK, /* any pointer to eBPF program stack */ ARG_CONST_STACK_SIZE, /* number of bytes accessed from stack */ + ARG_PTR_TO_CTX, /* pointer to context */ ARG_ANYTHING, /* any (initialized) argument is ok */ }; -- cgit v1.2.3 From f5a7fb88e1f82542ca14ba93a1d4fa35471c60ca Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Fri, 27 Mar 2015 14:31:11 +0900 Subject: vlan: Introduce helper functions to check if skb is tagged Separate the two checks for single vlan and multiple vlans in netif_skb_features(). This allows us to move the check for multiple vlans to another function later. Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index b11b28a30b9e..4265d440ec4d 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -561,4 +561,49 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, skb->protocol = htons(ETH_P_802_2); } +/** + * skb_vlan_tagged - check if skb is vlan tagged. + * @skb: skbuff to query + * + * Returns true if the skb is tagged, regardless of whether it is hardware + * accelerated or not. + */ +static inline bool skb_vlan_tagged(const struct sk_buff *skb) +{ + if (!skb_vlan_tag_present(skb) && + likely(skb->protocol != htons(ETH_P_8021Q) && + skb->protocol != htons(ETH_P_8021AD))) + return false; + + return true; +} + +/** + * skb_vlan_tagged_multi - check if skb is vlan tagged with multiple headers. + * @skb: skbuff to query + * + * Returns true if the skb is tagged with multiple vlan headers, regardless + * of whether it is hardware accelerated or not. + */ +static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb) +{ + __be16 protocol = skb->protocol; + + if (!skb_vlan_tag_present(skb)) { + struct vlan_ethhdr *veh; + + if (likely(protocol != htons(ETH_P_8021Q) && + protocol != htons(ETH_P_8021AD))) + return false; + + veh = (struct vlan_ethhdr *)skb->data; + protocol = veh->h_vlan_encapsulated_proto; + } + + if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD)) + return false; + + return true; +} + #endif /* !(_LINUX_IF_VLAN_H_) */ -- cgit v1.2.3 From 8cb65d00086bfba22bac87ff18b751432fc74003 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Fri, 27 Mar 2015 14:31:12 +0900 Subject: net: Move check for multiple vlans to drivers To allow drivers to handle the features check for multiple tags, move the check to ndo_features_check(). As no drivers currently handle multiple tagged TSO, introduce dflt_features_check() and call it if the driver does not have ndo_features_check(). Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 4265d440ec4d..920e4457ce6e 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -606,4 +606,26 @@ static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb) return true; } +/** + * vlan_features_check - drop unsafe features for skb with multiple tags. + * @skb: skbuff to query + * @features: features to be checked + * + * Returns features without unsafe ones if the skb has multiple tags. + */ +static inline netdev_features_t vlan_features_check(const struct sk_buff *skb, + netdev_features_t features) +{ + if (skb_vlan_tagged_multi(skb)) + features = netdev_intersect_features(features, + NETIF_F_SG | + NETIF_F_HIGHDMA | + NETIF_F_FRAGLIST | + NETIF_F_GEN_CSUM | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX); + + return features; +} + #endif /* !(_LINUX_IF_VLAN_H_) */ -- cgit v1.2.3 From e38f30256b36700aa63aa709dc091bf6eb69c257 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Fri, 27 Mar 2015 14:31:13 +0900 Subject: net: Introduce passthru_features_check As there are a number of (especially virtual) devices that don't need the multiple vlan check, introduce passthru_features_check() for convenience. Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 08c4ab37189f..967bb4c8caf1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3657,6 +3657,9 @@ void netdev_change_features(struct net_device *dev); void netif_stacked_transfer_operstate(const struct net_device *rootdev, struct net_device *dev); +netdev_features_t passthru_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features); netdev_features_t netif_skb_features(struct sk_buff *skb); static inline bool net_gso_ok(netdev_features_t features, int gso_type) -- cgit v1.2.3 From eb2d90c058280b41c8493cb17271e053b5ebba39 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 26 Mar 2015 11:35:59 +0100 Subject: mfd: max77693: Remove unused structures The max77693 regulator driver no longer supports board files. Remove the left-overs. Additionally fix name of device in comment. Signed-off-by: Krzysztof Kozlowski Acked-by: Lee Jones Signed-off-by: Mark Brown --- include/linux/mfd/max77693.h | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/max77693.h b/include/linux/mfd/max77693.h index f0b6585cd874..09a4dedaeea8 100644 --- a/include/linux/mfd/max77693.h +++ b/include/linux/mfd/max77693.h @@ -30,7 +30,7 @@ #ifndef __LINUX_MFD_MAX77693_H #define __LINUX_MFD_MAX77693_H -/* MAX77686 regulator IDs */ +/* MAX77693 regulator IDs */ enum max77693_regulators { MAX77693_ESAFEOUT1 = 0, MAX77693_ESAFEOUT2, @@ -38,12 +38,6 @@ enum max77693_regulators { MAX77693_REG_MAX, }; -struct max77693_regulator_data { - int id; - struct regulator_init_data *initdata; - struct device_node *of_node; -}; - struct max77693_reg_data { u8 addr; u8 data; @@ -103,10 +97,6 @@ struct max77693_led_platform_data { /* MAX77693 */ struct max77693_platform_data { - /* regulator data */ - struct max77693_regulator_data *regulators; - int num_regulators; - /* muic data */ struct max77693_muic_platform_data *muic_data; struct max77693_led_platform_data *led_data; -- cgit v1.2.3 From 6f240fbc474018872f6d9664a3bd350bd22cc8d9 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Tue, 10 Mar 2015 09:27:35 +0900 Subject: PM / devfreq: event: Add const keyword for devfreq_event_ops structure This patch adds the const keyword for devfreq_event_ops structure because the ops of devfreq_event_desc structure should not be changed after initialization. Cc: Myungjoo Ham Cc: Kyungmin Park Signed-off-by: Chanwoo Choi Signed-off-by: MyungJoo Ham --- include/linux/devfreq-event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/devfreq-event.h b/include/linux/devfreq-event.h index 602fbbfcfeed..0a83a1e648b0 100644 --- a/include/linux/devfreq-event.h +++ b/include/linux/devfreq-event.h @@ -91,7 +91,7 @@ struct devfreq_event_desc { const char *name; void *driver_data; - struct devfreq_event_ops *ops; + const struct devfreq_event_ops *ops; }; #if defined(CONFIG_PM_DEVFREQ_EVENT) -- cgit v1.2.3 From 6e00ff079354ee72fe95cb61a3993962074a2592 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Sun, 29 Mar 2015 12:45:42 +0100 Subject: mfd: arizona: Correct type of gpio_defaults gpio_defaults needs to be specified as an unsigned int rather than an int, because the intention of the DT binding is that all out of range values for a 16-bit register will cause the defaults to be used, however, if gpio_defaults is an int then values that are larger than INT_MAX will become negative numbers and be written out directly to the hardware. As no where in the code replies on gpio_defaults being an int, the simplest fix is to just change it to unsigned. Signed-off-by: Charles Keepax Signed-off-by: Lee Jones --- include/linux/mfd/arizona/pdata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h index 48fe31356605..1789cb0f4f17 100644 --- a/include/linux/mfd/arizona/pdata.h +++ b/include/linux/mfd/arizona/pdata.h @@ -94,7 +94,7 @@ struct arizona_pdata { int gpio_base; /** Pin state for GPIO pins */ - int gpio_defaults[ARIZONA_MAX_GPIO]; + unsigned int gpio_defaults[ARIZONA_MAX_GPIO]; /** * Maximum number of channels clocks will be generated for, -- cgit v1.2.3 From 5cdc7f02162aa8b980bd5d89e86fd94de0285ca3 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sat, 28 Mar 2015 22:45:35 +0200 Subject: mfd: menelaus: Delete omap_has_menelaus Delete unused macro. Signed-off-by: Aaro Koskinen Signed-off-by: Lee Jones --- include/linux/mfd/menelaus.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/menelaus.h b/include/linux/mfd/menelaus.h index f097e89134cb..a1e12bf393db 100644 --- a/include/linux/mfd/menelaus.h +++ b/include/linux/mfd/menelaus.h @@ -38,10 +38,4 @@ extern int menelaus_set_vcore_hw(unsigned int roof_mV, unsigned int floor_mV); extern int menelaus_set_regulator_sleep(int enable, u32 val); -#if defined(CONFIG_ARCH_OMAP2) && defined(CONFIG_MENELAUS) -#define omap_has_menelaus() 1 -#else -#define omap_has_menelaus() 0 -#endif - #endif -- cgit v1.2.3 From 1ea8684e1e0d7453f65a1ed69c2d0b890bfb9e33 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sat, 28 Mar 2015 22:45:36 +0200 Subject: mfd: menelaus: Drop support for SW controller VCORE Drop support for SW controlled VCORE, nobody uses it. Signed-off-by: Aaro Koskinen Signed-off-by: Lee Jones --- include/linux/mfd/menelaus.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/menelaus.h b/include/linux/mfd/menelaus.h index a1e12bf393db..9e85ac06da89 100644 --- a/include/linux/mfd/menelaus.h +++ b/include/linux/mfd/menelaus.h @@ -24,7 +24,6 @@ extern int menelaus_set_vaux(unsigned int mV); extern int menelaus_set_vdcdc(int dcdc, unsigned int mV); extern int menelaus_set_slot_sel(int enable); extern int menelaus_get_slot_pin_states(void); -extern int menelaus_set_vcore_sw(unsigned int mV); extern int menelaus_set_vcore_hw(unsigned int roof_mV, unsigned int floor_mV); #define EN_VPLL_SLEEP (1 << 7) -- cgit v1.2.3 From bc917be8105993c256338ad1189650364a741483 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 21 Mar 2015 17:45:43 -0400 Subject: saner iov_iter initialization primitives iovec-backed iov_iter instances are assumed to satisfy several properties: * no more than UIO_MAXIOV elements in iovec array * total size of all ranges is no more than MAX_RW_COUNT * all ranges pass access_ok(). The problem is, invariants of data structures should be established in the primitives creating those data structures, not in the code using those primitives. And iov_iter_init() violates that principle. For a while we managed to get away with that, but once the use of iov_iter started to spread, it didn't take long for shit to hit the fan - missed check in sys_sendto() had introduced a roothole. We _do_ have primitives for importing and validating iovecs (both native and compat ones) and those primitives are almost always followed by shoving the resulting iovec into iov_iter. Life would be considerably simpler (and safer) if we combined those primitives with initializing iov_iter. That gives us two new primitives - import_iovec() and compat_import_iovec(). Calling conventions: iovec = iov_array; err = import_iovec(direction, uvec, nr_segs, ARRAY_SIZE(iov_array), &iovec, &iter); imports user vector into kernel space (into iov_array if it fits, allocated if it doesn't fit or if iovec was NULL), validates it and sets iter up to refer to it. On success 0 is returned and allocated kernel copy (or NULL if the array had fit into caller-supplied one) is returned via iovec. On failure all allocations are undone and -E... is returned. If the total size of ranges exceeds MAX_RW_COUNT, the excess is silently truncated. compat_import_iovec() expects uvec to be a pointer to user array of compat_iovec; otherwise it's identical to import_iovec(). Finally, import_single_range() sets iov_iter backed by single-element iovec covering a user-supplied range - err = import_single_range(direction, address, size, iovec, &iter); does validation and sets iter up. Again, size in excess of MAX_RW_COUNT gets silently truncated. Next commits will be switching the things up to use of those and reducing the amount of iov_iter_init() instances. Signed-off-by: Al Viro --- include/linux/uio.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 71880299ed48..1f4a37f1f025 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -139,4 +139,18 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count) size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); +int import_iovec(int type, const struct iovec __user * uvector, + unsigned nr_segs, unsigned fast_segs, + struct iovec **iov, struct iov_iter *i); + +#ifdef CONFIG_COMPAT +struct compat_iovec; +int compat_import_iovec(int type, const struct compat_iovec __user * uvector, + unsigned nr_segs, unsigned fast_segs, + struct iovec **iov, struct iov_iter *i); +#endif + +int import_single_range(int type, void __user *buf, size_t len, + struct iovec *iov, struct iov_iter *i); + #endif -- cgit v1.2.3 From 9647507aff4d885d98a884a634d360b8f2d24c10 Mon Sep 17 00:00:00 2001 From: Jacek Anaszewski Date: Thu, 12 Mar 2015 08:45:02 -0700 Subject: leds: flash: Fix the size of sysfs_groups array LED_FLASH_MAX_SYSFS_GROUPS macro had value that was relevant for previous version of the patches introducing LED Flash class. Currently it is required to reserve the room for maximum 4 sysfs groups. Since the last element of the struct attribute_group array passed to the function device_create_with_groups has to be NULL, the size of the array has to be greater by one than maximum allowed number of groups. Therefore, the name of the macro is being changed to LED_FLASH_SYSFS_GROUPS_SIZE, to make it more accurrate. Signed-off-by: Jacek Anaszewski Acked-by: Kyungmin Park Cc: Bryan Wu Cc: Richard Purdie Signed-off-by: Bryan Wu --- include/linux/led-class-flash.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/led-class-flash.h b/include/linux/led-class-flash.h index 21ec91e13c47..e97966d1fb8d 100644 --- a/include/linux/led-class-flash.h +++ b/include/linux/led-class-flash.h @@ -32,7 +32,7 @@ struct led_classdev_flash; #define LED_FAULT_LED_OVER_TEMPERATURE (1 << 8) #define LED_NUM_FLASH_FAULTS 9 -#define LED_FLASH_MAX_SYSFS_GROUPS 7 +#define LED_FLASH_SYSFS_GROUPS_SIZE 5 struct led_flash_ops { /* set flash brightness */ @@ -80,7 +80,7 @@ struct led_classdev_flash { struct led_flash_setting timeout; /* LED Flash class sysfs groups */ - const struct attribute_group *sysfs_groups[LED_FLASH_MAX_SYSFS_GROUPS]; + const struct attribute_group *sysfs_groups[LED_FLASH_SYSFS_GROUPS_SIZE]; }; static inline struct led_classdev_flash *lcdev_to_flcdev( -- cgit v1.2.3 From 0e6c9122a6ec96d19f1db61e9750287d86b6829c Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Wed, 25 Mar 2015 16:23:44 +0800 Subject: PCI: Keep individual VF BAR size in struct pci_sriov Currently we don't store the individual VF BAR size. We calculate it when needed by dividing the PF's IOV resource size (which contains space for *all* the VFs) by total_VFs or by reading the BAR in the SR-IOV capability again. Keep the individual VF BAR size in struct pci_sriov.barsz[], add pci_iov_resource_size() to retrieve it, and use that instead of doing the division or reading the SR-IOV capability BAR. [bhelgaas: rename to "barsz[]", simplify barsz[] index computation, remove SR-IOV capability BAR sizing] Signed-off-by: Wei Yang Acked-by: Bjorn Helgaas Signed-off-by: Benjamin Herrenschmidt --- include/linux/pci.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 211e9da8a7d7..15596582e575 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1675,6 +1675,7 @@ int pci_num_vf(struct pci_dev *dev); int pci_vfs_assigned(struct pci_dev *dev); int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs); int pci_sriov_get_totalvfs(struct pci_dev *dev); +resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno); #else static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) { return -ENODEV; } @@ -1686,6 +1687,8 @@ static inline int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs) { return 0; } static inline int pci_sriov_get_totalvfs(struct pci_dev *dev) { return 0; } +static inline resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno) +{ return 0; } #endif #if defined(CONFIG_HOTPLUG_PCI) || defined(CONFIG_HOTPLUG_PCI_MODULE) -- cgit v1.2.3 From b07579c0924eee1543eb6cd2c19544d15a4b5236 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Wed, 25 Mar 2015 16:23:48 +0800 Subject: PCI: Export pci_iov_virtfn_bus() and pci_iov_virtfn_devfn() On PowerNV, some resource reservation is needed for SR-IOV VFs that don't exist at the bootup stage. To do the match between resources and VFs, the code need to get the VF's BDF in advance. Rename virtfn_bus() and virtfn_devfn() to pci_iov_virtfn_bus() and pci_iov_virtfn_devfn() and export them. [bhelgaas: changelog, make "busnr" int] Signed-off-by: Wei Yang Acked-by: Bjorn Helgaas Signed-off-by: Benjamin Herrenschmidt --- include/linux/pci.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 15596582e575..99ea94835fb6 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1669,6 +1669,9 @@ int pci_ext_cfg_avail(void); void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar); #ifdef CONFIG_PCI_IOV +int pci_iov_virtfn_bus(struct pci_dev *dev, int id); +int pci_iov_virtfn_devfn(struct pci_dev *dev, int id); + int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn); void pci_disable_sriov(struct pci_dev *dev); int pci_num_vf(struct pci_dev *dev); @@ -1677,6 +1680,14 @@ int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs); int pci_sriov_get_totalvfs(struct pci_dev *dev); resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno); #else +static inline int pci_iov_virtfn_bus(struct pci_dev *dev, int id) +{ + return -ENOSYS; +} +static inline int pci_iov_virtfn_devfn(struct pci_dev *dev, int id) +{ + return -ENOSYS; +} static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) { return -ENODEV; } static inline void pci_disable_sriov(struct pci_dev *dev) { } -- cgit v1.2.3 From 978d2d68312326b715a5913aaab1eaf24fe99108 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Wed, 25 Mar 2015 16:23:50 +0800 Subject: PCI: Add pcibios_iov_resource_alignment() interface Per the SR-IOV spec r1.1, sec 3.3.14, the required alignment of a PF's IOV BAR is the size of an individual VF BAR, and the size consumed is the individual VF BAR size times NumVFs. The PowerNV platform has additional alignment requirements to help support its Partitionable Endpoint device isolation feature (see Documentation/powerpc/pci_iov_resource_on_powernv.txt). Add a pcibios_iov_resource_alignment() interface to allow platforms to request additional alignment. [bhelgaas: changelog, adapt to reworked pci_sriov_resource_alignment(), drop "align" parameter] Signed-off-by: Wei Yang Acked-by: Bjorn Helgaas Signed-off-by: Benjamin Herrenschmidt --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 99ea94835fb6..4e1f17db1a81 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1174,6 +1174,7 @@ unsigned char pci_bus_max_busnr(struct pci_bus *bus); void pci_setup_bridge(struct pci_bus *bus); resource_size_t pcibios_window_alignment(struct pci_bus *bus, unsigned long type); +resource_size_t pcibios_iov_resource_alignment(struct pci_dev *dev, int resno); #define PCI_VGA_STATE_CHANGE_BRIDGE (1 << 0) #define PCI_VGA_STATE_CHANGE_DECODES (1 << 1) -- cgit v1.2.3 From 3bbf7f4624856751aa4cf279a472bd14a8eb16fd Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 31 Mar 2015 13:25:05 +1030 Subject: linux/cpumask.h: add typechecking to cpumask_test_cpu The Subtlety (1) referred to vanished with 6ba2ef7baac2 ("cpumask: Move deprecated functions to end of header."). That used to mention some suboptimal code generation by a, by now, rather ancient gcc. With gcc 4.7, I don't see any change in the generated code by making it a static inline, so let's add type checking and get rid of the ghost reference. Signed-off-by: Rasmus Villemoes Signed-off-by: Rusty Russell --- include/linux/cpumask.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 4ad2d3c8e21f..89558d0b56ac 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -290,11 +290,11 @@ static inline void cpumask_clear_cpu(int cpu, struct cpumask *dstp) * @cpumask: the cpumask pointer * * Returns 1 if @cpu is set in @cpumask, else returns 0 - * - * No static inline type checking - see Subtlety (1) above. */ -#define cpumask_test_cpu(cpu, cpumask) \ - test_bit(cpumask_check(cpu), cpumask_bits((cpumask))) +static inline int cpumask_test_cpu(int cpu, const struct cpumask *cpumask) +{ + return test_bit(cpumask_check(cpu), cpumask_bits((cpumask))); +} /** * cpumask_test_and_set_cpu - atomically test and set a cpu in a cpumask -- cgit v1.2.3 From ea3c023ebf7130d72f9f00a3992c22176ef6703b Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 27 Jan 2015 21:45:51 +0000 Subject: IRQCHIP: mips-gic: Add missing definitions for FDC IRQ Add missing VPE_PEND, VPE_RMASK and VPE_SMASK definitions for the local FDC interrupt. These local interrupt definitions aren't directly used, but if they exist they should be complete. Signed-off-by: James Hogan Cc: Andrew Bresticker Cc: Thomas Gleixner Cc: Jason Cooper Cc: linux-mips@linux-mips.org Reviewed-by: Andrew Bresticker Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/9127/ Signed-off-by: Ralf Baechle --- include/linux/irqchip/mips-gic.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index e6a6aac451db..2e79b4bb2d75 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -165,6 +165,8 @@ #define GIC_VPE_PEND_SWINT0_MSK (MSK(1) << GIC_VPE_PEND_SWINT0_SHF) #define GIC_VPE_PEND_SWINT1_SHF 5 #define GIC_VPE_PEND_SWINT1_MSK (MSK(1) << GIC_VPE_PEND_SWINT1_SHF) +#define GIC_VPE_PEND_FDC_SHF 6 +#define GIC_VPE_PEND_FDC_MSK (MSK(1) << GIC_VPE_PEND_FDC_SHF) /* GIC_VPE_RMASK Masks */ #define GIC_VPE_RMASK_WD_SHF 0 @@ -179,6 +181,8 @@ #define GIC_VPE_RMASK_SWINT0_MSK (MSK(1) << GIC_VPE_RMASK_SWINT0_SHF) #define GIC_VPE_RMASK_SWINT1_SHF 5 #define GIC_VPE_RMASK_SWINT1_MSK (MSK(1) << GIC_VPE_RMASK_SWINT1_SHF) +#define GIC_VPE_RMASK_FDC_SHF 6 +#define GIC_VPE_RMASK_FDC_MSK (MSK(1) << GIC_VPE_RMASK_FDC_SHF) /* GIC_VPE_SMASK Masks */ #define GIC_VPE_SMASK_WD_SHF 0 @@ -193,6 +197,8 @@ #define GIC_VPE_SMASK_SWINT0_MSK (MSK(1) << GIC_VPE_SMASK_SWINT0_SHF) #define GIC_VPE_SMASK_SWINT1_SHF 5 #define GIC_VPE_SMASK_SWINT1_MSK (MSK(1) << GIC_VPE_SMASK_SWINT1_SHF) +#define GIC_VPE_SMASK_FDC_SHF 6 +#define GIC_VPE_SMASK_FDC_MSK (MSK(1) << GIC_VPE_SMASK_FDC_SHF) /* GIC nomenclature for Core Interrupt Pins. */ #define GIC_CPU_INT0 0 /* Core Interrupt 2 */ -- cgit v1.2.3 From 8286ae03308c6f97f346f9f8cb9174b04969add5 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 25 Mar 2015 15:39:50 +0000 Subject: MIPS: Add CDMM bus support Add MIPS Common Device Memory Map (CDMM) support in the form of a bus in the standard Linux device model. Each device attached via CDMM is discoverable via an 8-bit type identifier and may contain a number of blocks of memory mapped registers in the CDMM region. IRQs are expected to be handled separately. Due to the per-cpu (per-VPE for MT cores) nature of the CDMM devices, all the driver callbacks take place from workqueues which are run on the right CPU for the device in question, so that the driver doesn't need to be as concerned about which CPU it is running on. Callbacks also exist for when CPUs are taken offline, so that any per-CPU resources used by the driver can be disabled so they don't get forcefully migrated. CDMM devices are created as children of the CPU device they are attached to. Any existing CDMM configuration by the bootloader will be inherited, however platforms wishing to enable CDMM should implement the weak mips_cdmm_phys_base() function (see asm/cdmm.h) so that the bus driver knows where it should put the CDMM region in the physical address space if the bootloader hasn't already enabled it. A mips_cdmm_early_probe() function is also provided to allow early boot or particularly low level code to set up the CDMM region and probe for a specific device type, for example early console or KGDB IO drivers for the EJTAG Fast Debug Channel (FDC) CDMM device. Signed-off-by: James Hogan Cc: Greg Kroah-Hartman Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/9599/ Signed-off-by: Ralf Baechle --- include/linux/mod_devicetable.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index e530533b94be..3bfd56778c29 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -546,6 +546,14 @@ struct amba_id { void *data; }; +/** + * struct mips_cdmm_device_id - identifies devices in MIPS CDMM bus + * @type: Device type identifier. + */ +struct mips_cdmm_device_id { + __u8 type; +}; + /* * Match x86 CPUs for CPU specific drivers. * See documentation of "x86_match_cpu" for details. -- cgit v1.2.3 From 6429e2b6fc05d8640bb94f5b67c047e936707f31 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 29 Jan 2015 11:14:09 +0000 Subject: IRQCHIP: mips-gic: Add function for retrieving FDC IRQ Add a function to the MIPS GIC driver for retrieving the Fast Debug Channel (FDC) interrupt number, similar to the existing ones for the timer and perf counter interrupts. This will be used by platform implementations of get_c0_fdc_int() if a GIC is present. A workaround exists for interAptiv and proAptiv which claim to be able to route the FDC interrupt but don't seem to be able to in practice (at least on Malta). [ralf@linux-mips.org: Fix conflict.] Signed-off-by: James Hogan Cc: Ralf Baechle Cc: Andrew Bresticker Cc: Thomas Gleixner Cc: Jason Cooper Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/9142/ Signed-off-by: Ralf Baechle --- include/linux/irqchip/mips-gic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index 2e79b4bb2d75..83099e5ab2c4 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -251,4 +251,5 @@ extern unsigned int plat_ipi_call_int_xlate(unsigned int); extern unsigned int plat_ipi_resched_int_xlate(unsigned int); extern int gic_get_c0_compare_int(void); extern int gic_get_c0_perfcount_int(void); +extern int gic_get_c0_fdc_int(void); #endif /* __LINUX_IRQCHIP_MIPS_GIC_H */ -- cgit v1.2.3 From 8fa4b93067b70a87785279a7c60158e58e4f2f20 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Mon, 23 Mar 2015 12:32:01 +0000 Subject: IRQCHIP: irq-mips-gic: Add new functions to start/stop the GIC counter We add new functions to start and stop the GIC counter since there are no guarantees the counter will be running after a CPU reset. The GIC counter is stopped by setting the 29th bit on the GIC Config register and it is started by clearing that bit. Signed-off-by: Markos Chandras Cc: Thomas Gleixner Cc: Jason Cooper Cc: Andrew Bresticker Cc: Qais Yousef Cc: linux-kernel@vger.kernel.org Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/9594/ Signed-off-by: Ralf Baechle --- include/linux/irqchip/mips-gic.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index 83099e5ab2c4..9b1ad3734911 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -246,6 +246,8 @@ extern unsigned int gic_get_count_width(void); extern cycle_t gic_read_compare(void); extern void gic_write_compare(cycle_t cnt); extern void gic_write_cpu_compare(cycle_t cnt, int cpu); +extern void gic_start_count(void); +extern void gic_stop_count(void); extern void gic_send_ipi(unsigned int intr); extern unsigned int plat_ipi_call_int_xlate(unsigned int); extern unsigned int plat_ipi_resched_int_xlate(unsigned int); -- cgit v1.2.3 From 06ca7f68d4c861d549a8deb161e1527065a80bb1 Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Mon, 30 Mar 2015 21:55:52 +0200 Subject: crypto: api - prevent helper ciphers from being used Several hardware related cipher implementations are implemented as follows: a "helper" cipher implementation is registered with the kernel crypto API. Such helper ciphers are never intended to be called by normal users. In some cases, calling them via the normal crypto API may even cause failures including kernel crashes. In a normal case, the "wrapping" ciphers that use the helpers ensure that these helpers are invoked such that they cannot cause any calamity. Considering the AF_ALG user space interface, unprivileged users can call all ciphers registered with the crypto API, including these helper ciphers that are not intended to be called directly. That means, with AF_ALG user space may invoke these helper ciphers and may cause undefined states or side effects. To avoid any potential side effects with such helpers, the patch prevents the helpers to be called directly. A new cipher type flag is added: CRYPTO_ALG_INTERNAL. This flag shall be used to mark helper ciphers. These ciphers can only be used if the caller invoke the cipher with CRYPTO_ALG_INTERNAL in the type and mask field. Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu --- include/linux/crypto.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index fb5ef16d6a12..10df5d2d093a 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -94,6 +94,12 @@ */ #define CRYPTO_ALG_KERN_DRIVER_ONLY 0x00001000 +/* + * Mark a cipher as a service implementation only usable by another + * cipher and never by a normal user of the kernel crypto API + */ +#define CRYPTO_ALG_INTERNAL 0x00002000 + /* * Transform masks and values (for crt_flags). */ -- cgit v1.2.3 From 938c470976192590b4adc921b2e10fa31237eddc Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Mar 2015 13:43:04 +0100 Subject: iommu: Introduce domain_alloc and domain_free iommu_ops These new call-backs defer the allocation and destruction of 'struct iommu_domain' to the iommu driver. This allows drivers to embed this struct into their private domain structures and to get rid of the domain_init and domain_destroy call-backs when all drivers have been converted. Tested-by: Thierry Reding Tested-by: Heiko Stuebner Reviewed-by: Alex Williamson Acked-by: Will Deacon Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 38daa453f2e5..69d1d120c0a3 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -115,6 +115,11 @@ struct iommu_ops { bool (*capable)(enum iommu_cap); int (*domain_init)(struct iommu_domain *domain); void (*domain_destroy)(struct iommu_domain *domain); + + /* Domain allocation and freeing by the iommu driver */ + struct iommu_domain *(*domain_alloc)(void); + void (*domain_free)(struct iommu_domain *); + int (*attach_dev)(struct iommu_domain *domain, struct device *dev); void (*detach_dev)(struct iommu_domain *domain, struct device *dev); int (*map)(struct iommu_domain *domain, unsigned long iova, -- cgit v1.2.3 From 8539c7c16b970258e14761d8a1f7d10fe798031a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Mar 2015 13:43:05 +0100 Subject: iommu: Introduce iommu domain types This allows to handle domains differently based on their type in the future. An IOMMU driver can implement certain optimizations for DMA-API domains for example. The domain types can be extended later and some of the existing domain attributes can be migrated to become domain flags. Tested-by: Thierry Reding Tested-by: Heiko Stuebner Reviewed-by: Alex Williamson Acked-by: Will Deacon Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 69d1d120c0a3..72d03fe78cf6 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -51,7 +51,32 @@ struct iommu_domain_geometry { bool force_aperture; /* DMA only allowed in mappable range? */ }; +/* Domain feature flags */ +#define __IOMMU_DOMAIN_PAGING (1U << 0) /* Support for iommu_map/unmap */ +#define __IOMMU_DOMAIN_DMA_API (1U << 1) /* Domain for use in DMA-API + implementation */ +#define __IOMMU_DOMAIN_PT (1U << 2) /* Domain is identity mapped */ + +/* + * This are the possible domain-types + * + * IOMMU_DOMAIN_BLOCKED - All DMA is blocked, can be used to isolate + * devices + * IOMMU_DOMAIN_IDENTITY - DMA addresses are system physical addresses + * IOMMU_DOMAIN_UNMANAGED - DMA mappings managed by IOMMU-API user, used + * for VMs + * IOMMU_DOMAIN_DMA - Internally used for DMA-API implementations. + * This flag allows IOMMU drivers to implement + * certain optimizations for these domains + */ +#define IOMMU_DOMAIN_BLOCKED (0U) +#define IOMMU_DOMAIN_IDENTITY (__IOMMU_DOMAIN_PT) +#define IOMMU_DOMAIN_UNMANAGED (__IOMMU_DOMAIN_PAGING) +#define IOMMU_DOMAIN_DMA (__IOMMU_DOMAIN_PAGING | \ + __IOMMU_DOMAIN_DMA_API) + struct iommu_domain { + unsigned type; const struct iommu_ops *ops; void *priv; iommu_fault_handler_t handler; @@ -117,7 +142,7 @@ struct iommu_ops { void (*domain_destroy)(struct iommu_domain *domain); /* Domain allocation and freeing by the iommu driver */ - struct iommu_domain *(*domain_alloc)(void); + struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type); void (*domain_free)(struct iommu_domain *); int (*attach_dev)(struct iommu_domain *domain, struct device *dev); -- cgit v1.2.3 From 89be34a1ced886880a3219f9d2ba2192dc738ef2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Mar 2015 13:43:19 +0100 Subject: iommu: Remove domain_init and domain_free iommu_ops All drivers have been converted to the new domain_alloc and domain_free iommu-ops. So remove the old ones and get rid of iommu_domain->priv too, as this is no longer needed when the struct iommu_domain is embedded in the private structures of the iommu drivers. Tested-by: Thierry Reding Tested-by: Heiko Stuebner Reviewed-by: Alex Williamson Acked-by: Will Deacon Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 72d03fe78cf6..0546b8710ce3 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -78,7 +78,6 @@ struct iommu_domain_geometry { struct iommu_domain { unsigned type; const struct iommu_ops *ops; - void *priv; iommu_fault_handler_t handler; void *handler_token; struct iommu_domain_geometry geometry; @@ -138,8 +137,6 @@ enum iommu_attr { */ struct iommu_ops { bool (*capable)(enum iommu_cap); - int (*domain_init)(struct iommu_domain *domain); - void (*domain_destroy)(struct iommu_domain *domain); /* Domain allocation and freeing by the iommu driver */ struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type); -- cgit v1.2.3 From dbbc14775ae395a50d91f22904670ca4c9297542 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 30 Mar 2015 14:33:33 -0400 Subject: SUNRPC: Introduce missing well-known netids Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/msg_prot.h | 8 +++++++- include/linux/sunrpc/xprtrdma.h | 5 ----- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index aadc6a04e1ac..807371357160 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -142,12 +142,18 @@ typedef __be32 rpc_fraghdr; (RPC_REPHDRSIZE + (2 + RPC_MAX_AUTH_SIZE/4)) /* - * RFC1833/RFC3530 rpcbind (v3+) well-known netid's. + * Well-known netids. See: + * + * http://www.iana.org/assignments/rpc-netids/rpc-netids.xhtml */ #define RPCBIND_NETID_UDP "udp" #define RPCBIND_NETID_TCP "tcp" +#define RPCBIND_NETID_RDMA "rdma" +#define RPCBIND_NETID_SCTP "sctp" #define RPCBIND_NETID_UDP6 "udp6" #define RPCBIND_NETID_TCP6 "tcp6" +#define RPCBIND_NETID_RDMA6 "rdma6" +#define RPCBIND_NETID_SCTP6 "sctp6" #define RPCBIND_NETID_LOCAL "local" /* diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index 64a0a0a97b23..c984c85981ea 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h @@ -40,11 +40,6 @@ #ifndef _LINUX_SUNRPC_XPRTRDMA_H #define _LINUX_SUNRPC_XPRTRDMA_H -/* - * rpcbind (v3+) RDMA netid. - */ -#define RPCBIND_NETID_RDMA "rdma" - /* * Constants. Max RPC/NFS header is big enough to account for * additional marshaling buffers passed down by Linux client. -- cgit v1.2.3 From 5c935165da79644df90a647ecc140fb77b40dee5 Mon Sep 17 00:00:00 2001 From: Doug Anderson Date: Mon, 9 Mar 2015 16:18:21 -0700 Subject: mmc: dw_mmc: Add a timeout for sending CMD11 In the Designware databook's description of the "Voltage Switch Normal Scenario" it instructs us to set a timer and fail the voltage change if we don't see the voltage change interrupt within 2ms. Let's implement that. Without implementing this I have often been able to reproduce a hang while trying to send CMD11 on an rk3288-based board while constantly ejecting and inserting UHS cards. Signed-off-by: Doug Anderson Signed-off-by: Jaehoon Chung Signed-off-by: Ulf Hansson --- include/linux/mmc/dw_mmc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h index 471fb3116dbe..9efc567e3ced 100644 --- a/include/linux/mmc/dw_mmc.h +++ b/include/linux/mmc/dw_mmc.h @@ -202,6 +202,8 @@ struct dw_mci { int irq; int sdio_id0; + + struct timer_list cmd11_timer; }; /* DMA ops for Internal/External DMAC interface */ -- cgit v1.2.3 From 92f1719407b90475b3be0b7b9c983dec2ff8351e Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Sun, 29 Mar 2015 23:11:51 +0200 Subject: ptp: introduce get/set time methods with explicit 64 bit seconds. Converting the PHC drivers over to the new methods is one step along the way to making them ready for 2038. Once all the drivers are up to date, then the old methods will be removed. Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- include/linux/ptp_clock_kernel.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 0d8ff3fb84ba..7d6f8e66396f 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -64,12 +64,18 @@ struct ptp_clock_request { * @adjtime: Shifts the time of the hardware clock. * parameter delta: Desired change in nanoseconds. * - * @gettime: Reads the current time from the hardware clock. + * @gettime: Reads the current time from the hardware clock. (deprecated) * parameter ts: Holds the result. * - * @settime: Set the current time on the hardware clock. + * @settime: Set the current time on the hardware clock. (deprecated) * parameter ts: Time value to set. * + * @gettime64: Reads the current time from the hardware clock. + * parameter ts: Holds the result. + * + * @settime64: Set the current time on the hardware clock. + * parameter ts: Time value to set. + * * @enable: Request driver to enable or disable an ancillary feature. * parameter request: Desired resource to enable or disable. * parameter on: Caller passes one to enable or zero to disable. @@ -106,6 +112,8 @@ struct ptp_clock_info { int (*adjtime)(struct ptp_clock_info *ptp, s64 delta); int (*gettime)(struct ptp_clock_info *ptp, struct timespec *ts); int (*settime)(struct ptp_clock_info *ptp, const struct timespec *ts); + int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts); + int (*settime64)(struct ptp_clock_info *p, const struct timespec64 *ts); int (*enable)(struct ptp_clock_info *ptp, struct ptp_clock_request *request, int on); int (*verify)(struct ptp_clock_info *ptp, unsigned int pin, -- cgit v1.2.3 From ed7c6317bc599502e1fdc7f5f95cb9a5550360a4 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Sun, 29 Mar 2015 23:12:13 +0200 Subject: ptp: remove 32 bit get/set methods. All of the PHC drivers have been converted to the new methods. This patch converts the three remaining callers within the core code and removes the older methods for good. As a result, the core PHC code is ready for the year 2038. However, some of the PHC drivers are not quite ready yet. Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- include/linux/ptp_clock_kernel.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 7d6f8e66396f..b8b73066d137 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -64,12 +64,6 @@ struct ptp_clock_request { * @adjtime: Shifts the time of the hardware clock. * parameter delta: Desired change in nanoseconds. * - * @gettime: Reads the current time from the hardware clock. (deprecated) - * parameter ts: Holds the result. - * - * @settime: Set the current time on the hardware clock. (deprecated) - * parameter ts: Time value to set. - * * @gettime64: Reads the current time from the hardware clock. * parameter ts: Holds the result. * @@ -110,8 +104,6 @@ struct ptp_clock_info { struct ptp_pin_desc *pin_config; int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta); int (*adjtime)(struct ptp_clock_info *ptp, s64 delta); - int (*gettime)(struct ptp_clock_info *ptp, struct timespec *ts); - int (*settime)(struct ptp_clock_info *ptp, const struct timespec *ts); int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts); int (*settime64)(struct ptp_clock_info *p, const struct timespec64 *ts); int (*enable)(struct ptp_clock_info *ptp, -- cgit v1.2.3 From 52b09914af86fa3e728175c1125c91520e437b2f Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 23 Feb 2015 16:36:41 -0500 Subject: dm: remove unnecessary wrapper around blk_lld_busy There is no need for DM to export a wrapper around the already exported blk_lld_busy(). Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index fd23978d93fe..51cc1deb7af3 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -605,9 +605,4 @@ static inline unsigned long to_bytes(sector_t n) return (n << SECTOR_SHIFT); } -/*----------------------------------------------------------------- - * Helper for block layer and dm core operations - *---------------------------------------------------------------*/ -int dm_underlying_device_busy(struct request_queue *q); - #endif /* _LINUX_DEVICE_MAPPER_H */ -- cgit v1.2.3 From 930345ea630405aa6e6f42efcb149c3f360a6b67 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Sun, 29 Mar 2015 16:59:25 +0200 Subject: netlink: implement nla_put_in_addr and nla_put_in6_addr IP addresses are often stored in netlink attributes. Add generic functions to do that. For nla_put_in_addr, it would be nicer to pass struct in_addr but this is not used universally throughout the kernel, in way too many places __be32 is used to store IPv4 address. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- include/linux/netfilter/ipset/ip_set.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index f1606fa6132d..34b172301558 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -483,7 +483,7 @@ static inline int nla_put_ipaddr4(struct sk_buff *skb, int type, __be32 ipaddr) if (!__nested) return -EMSGSIZE; - ret = nla_put_net32(skb, IPSET_ATTR_IPADDR_IPV4, ipaddr); + ret = nla_put_in_addr(skb, IPSET_ATTR_IPADDR_IPV4, ipaddr); if (!ret) ipset_nest_end(skb, __nested); return ret; @@ -497,8 +497,7 @@ static inline int nla_put_ipaddr6(struct sk_buff *skb, int type, if (!__nested) return -EMSGSIZE; - ret = nla_put(skb, IPSET_ATTR_IPADDR_IPV6, - sizeof(struct in6_addr), ipaddrptr); + ret = nla_put_in6_addr(skb, IPSET_ATTR_IPADDR_IPV6, ipaddrptr); if (!ret) ipset_nest_end(skb, __nested); return ret; -- cgit v1.2.3 From ffa88f37ffeaac398be68f9678b0e6046a5ba7f6 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 30 Mar 2015 17:45:22 +0300 Subject: net/mlx4_en: Move statistics bitmap setting to the Ethernet driver The statistics bitmap belongs to the Ethernet driver, move it there. Signed-off-by: Eran Ben Elisha Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 4550c67b92e4..49abbe28e230 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1300,7 +1300,6 @@ int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac); void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac); int mlx4_get_base_qpn(struct mlx4_dev *dev, u8 port); int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac); -void mlx4_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap); int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu, u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx); int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, -- cgit v1.2.3 From 0b131561a7d639abb0a194d2d8fae839ce3b99e9 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Mon, 30 Mar 2015 17:45:25 +0300 Subject: net/mlx4_en: Add Flow control statistics display via ethtool Flow control per priority and Global pause counters are now visible via ethtool. The counters shows statistics regarding pauses in the device. Signed-off-by: Matan Barak Signed-off-by: Shani Michaeli Signed-off-by: Eran Ben Elisha Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 49abbe28e230..ab7ebec943b8 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -205,7 +205,8 @@ enum { MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 20, MLX4_DEV_CAP_FLAG2_PORT_REMAP = 1LL << 21, MLX4_DEV_CAP_FLAG2_QCN = 1LL << 22, - MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT = 1LL << 23 + MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT = 1LL << 23, + MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN = 1LL << 24 }; enum { -- cgit v1.2.3 From caa0e2d0e331a04cbc1cb9bca3169c1d94b80838 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 1 Apr 2015 08:21:51 +1030 Subject: virtio_config: reorder functions This simply reorders functions in virtio_config so width access wrapper helpers are all together. Drops an extra empty line while we are at it. Signed-off-by: Michael S. Tsirkin Signed-off-by: Rusty Russell --- include/linux/virtio_config.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index ca3ed78e5ec7..22d33034b578 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -298,13 +298,6 @@ static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val) } \ } while(0) -static inline u8 virtio_cread8(struct virtio_device *vdev, unsigned int offset) -{ - u8 ret; - vdev->config->get(vdev, offset, &ret, sizeof(ret)); - return ret; -} - /* Read @count fields, @bytes each. */ static inline void __virtio_cread_many(struct virtio_device *vdev, unsigned int offset, @@ -326,7 +319,6 @@ static inline void __virtio_cread_many(struct virtio_device *vdev, } while (gen != old); } - static inline void virtio_cread_bytes(struct virtio_device *vdev, unsigned int offset, void *buf, size_t len) @@ -334,6 +326,13 @@ static inline void virtio_cread_bytes(struct virtio_device *vdev, __virtio_cread_many(vdev, offset, buf, len, 1); } +static inline u8 virtio_cread8(struct virtio_device *vdev, unsigned int offset) +{ + u8 ret; + vdev->config->get(vdev, offset, &ret, sizeof(ret)); + return ret; +} + static inline void virtio_cwrite8(struct virtio_device *vdev, unsigned int offset, u8 val) { -- cgit v1.2.3 From 012665391dfe12bf8a88d1000e627be012c39dbf Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 1 Apr 2015 13:31:20 +1030 Subject: virtio: drop a useless config read "virtio: core support for config generation" fixed reading up 64 bit values, adding generation checks for such reads. By mistake, it left an explicit get call in place as well. the result is that the value is read twice, the first result is discarded. Not a big deal since this only happens with virtio blk and only on boot ATM, so performance isn't affected, but let's clean it up. Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck Signed-off-by: Rusty Russell --- include/linux/virtio_config.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 22d33034b578..1e306f727edc 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -373,7 +373,6 @@ static inline u64 virtio_cread64(struct virtio_device *vdev, unsigned int offset) { u64 ret; - vdev->config->get(vdev, offset, &ret, sizeof(ret)); __virtio_cread_many(vdev, offset, &ret, 1, sizeof(ret)); return virtio64_to_cpu(vdev, (__force __virtio64)ret); } -- cgit v1.2.3 From fed6cefe3b6e862dcc74d07324478caa07e84eaf Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 5 Feb 2015 11:44:41 +0100 Subject: x86/efi: Add a "debug" option to the efi= cmdline ... and hide the memory regions dump behind it. Make it default-off. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20141209095843.GA3990@pd.tnic Acked-by: Laszlo Ersek Acked-by: Dave Young Signed-off-by: Matt Fleming --- include/linux/efi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index cf7e431cbc73..af5be0368dec 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -942,6 +942,7 @@ extern int __init efi_setup_pcdp_console(char *); #define EFI_64BIT 5 /* Is the firmware 64-bit? */ #define EFI_PARAVIRT 6 /* Access is via a paravirt interface */ #define EFI_ARCH_1 7 /* First arch-specific bit */ +#define EFI_DBG 8 /* Print additional debug info at runtime */ #ifdef CONFIG_EFI /* -- cgit v1.2.3 From 9f083b74df3a7eaa100b456f2dc195512daf728e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Mar 2015 13:05:19 +0100 Subject: clockevents: Remove CONFIG_GENERIC_CLOCKEVENTS_BUILD This option was for simpler migration to the clock events code. Most architectures have been converted and the option has been disfunctional as a standalone option for quite some time. Remove it. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5021859.jl9OC1medj@vostro.rjw.lan Signed-off-by: Ingo Molnar --- include/linux/clockchips.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index e20232c3320a..57975131dac1 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -22,7 +22,7 @@ enum clock_event_nofitiers { CLOCK_EVT_NOTIFY_CPU_DEAD, }; -#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD +#ifdef CONFIG_GENERIC_CLOCKEVENTS #include #include @@ -229,13 +229,9 @@ static inline int tick_check_broadcast_expired(void) { return 0; } static inline void tick_setup_hrtimer_broadcast(void) {}; #endif -#ifdef CONFIG_GENERIC_CLOCKEVENTS extern int clockevents_notify(unsigned long reason, void *arg); -#else -static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; } -#endif -#else /* CONFIG_GENERIC_CLOCKEVENTS_BUILD */ +#else /* CONFIG_GENERIC_CLOCKEVENTS */ static inline void clockevents_suspend(void) {} static inline void clockevents_resume(void) {} @@ -243,6 +239,7 @@ static inline void clockevents_resume(void) {} static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; } static inline int tick_check_broadcast_expired(void) { return 0; } static inline void tick_setup_hrtimer_broadcast(void) {}; +static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; } #endif -- cgit v1.2.3 From c1797baf6880174f899ce3960d0598f5bbeeb7ff Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Mar 2015 13:07:37 +0100 Subject: tick: Move core only declarations and functions to core No point to expose everything to the world. People just believe such functions can be abused for whatever purposes. Sigh. Signed-off-by: Thomas Gleixner [ Rebased on top of 4.0-rc5 ] Signed-off-by: Rafael J. Wysocki Cc: Nicolas Pitre Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/28017337.VbCUc39Gme@vostro.rjw.lan [ Merged to latest timers/core ] Signed-off-by: Ingo Molnar --- include/linux/clockchips.h | 15 ++--- include/linux/tick.h | 134 ++++++--------------------------------------- 2 files changed, 24 insertions(+), 125 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 57975131dac1..bc3af821350b 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -193,15 +193,6 @@ extern void clockevents_config_and_register(struct clock_event_device *dev, extern int clockevents_update_freq(struct clock_event_device *ce, u32 freq); -extern void clockevents_exchange_device(struct clock_event_device *old, - struct clock_event_device *new); -extern void clockevents_set_state(struct clock_event_device *dev, - enum clock_event_state state); -extern int clockevents_program_event(struct clock_event_device *dev, - ktime_t expires, bool force); - -extern void clockevents_handle_noop(struct clock_event_device *dev); - static inline void clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 minsec) { @@ -209,6 +200,12 @@ clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 minsec) freq, minsec); } +/* Should be core only, but is abused by arm bl_switcher */ +extern void clockevents_set_state(struct clock_event_device *dev, + enum clock_event_state state); +extern int clockevents_program_event(struct clock_event_device *dev, + ktime_t expires, bool force); + extern void clockevents_suspend(void); extern void clockevents_resume(void); diff --git a/include/linux/tick.h b/include/linux/tick.h index 9c085dc12ae9..f9a2d2687a46 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -1,7 +1,5 @@ -/* linux/include/linux/tick.h - * - * This file contains the structure definitions for tick related functions - * +/* + * Tick related global functions */ #ifndef _LINUX_TICK_H #define _LINUX_TICK_H @@ -9,13 +7,12 @@ #include #include #include -#include #include #include #include +/* ARM BL switcher abuse support */ #ifdef CONFIG_GENERIC_CLOCKEVENTS - enum tick_device_mode { TICKDEV_MODE_PERIODIC, TICKDEV_MODE_ONESHOT, @@ -25,133 +22,38 @@ struct tick_device { struct clock_event_device *evtdev; enum tick_device_mode mode; }; - -enum tick_nohz_mode { - NOHZ_MODE_INACTIVE, - NOHZ_MODE_LOWRES, - NOHZ_MODE_HIGHRES, -}; - -/** - * struct tick_sched - sched tick emulation and no idle tick control/stats - * @sched_timer: hrtimer to schedule the periodic tick in high - * resolution mode - * @last_tick: Store the last tick expiry time when the tick - * timer is modified for nohz sleeps. This is necessary - * to resume the tick timer operation in the timeline - * when the CPU returns from nohz sleep. - * @tick_stopped: Indicator that the idle tick has been stopped - * @idle_jiffies: jiffies at the entry to idle for idle time accounting - * @idle_calls: Total number of idle calls - * @idle_sleeps: Number of idle calls, where the sched tick was stopped - * @idle_entrytime: Time when the idle call was entered - * @idle_waketime: Time when the idle was interrupted - * @idle_exittime: Time when the idle state was left - * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped - * @iowait_sleeptime: Sum of the time slept in idle with sched tick stopped, with IO outstanding - * @sleep_length: Duration of the current idle sleep - * @do_timer_lst: CPU was the last one doing do_timer before going idle - */ -struct tick_sched { - struct hrtimer sched_timer; - unsigned long check_clocks; - enum tick_nohz_mode nohz_mode; - ktime_t last_tick; - int inidle; - int tick_stopped; - unsigned long idle_jiffies; - unsigned long idle_calls; - unsigned long idle_sleeps; - int idle_active; - ktime_t idle_entrytime; - ktime_t idle_waketime; - ktime_t idle_exittime; - ktime_t idle_sleeptime; - ktime_t iowait_sleeptime; - ktime_t sleep_length; - unsigned long last_jiffies; - unsigned long next_jiffies; - ktime_t idle_expires; - int do_timer_last; -}; - -extern void __init tick_init(void); -extern int tick_is_oneshot_available(void); extern struct tick_device *tick_get_device(int cpu); +#endif +#ifdef CONFIG_GENERIC_CLOCKEVENTS +extern void __init tick_init(void); extern void tick_freeze(void); extern void tick_unfreeze(void); +#else /* CONFIG_GENERIC_CLOCKEVENTS */ +static inline void tick_init(void) { } +static inline void tick_freeze(void) { } +static inline void tick_unfreeze(void) { } +#endif /* !CONFIG_GENERIC_CLOCKEVENTS */ -# ifdef CONFIG_HIGH_RES_TIMERS -extern int tick_init_highres(void); -extern int tick_program_event(ktime_t expires, int force); -extern void tick_setup_sched_timer(void); -# endif - -# if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS -extern void tick_cancel_sched_timer(int cpu); -# else -static inline void tick_cancel_sched_timer(int cpu) { } -# endif - -# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST -extern struct tick_device *tick_get_broadcast_device(void); -extern struct cpumask *tick_get_broadcast_mask(void); - -# ifdef CONFIG_TICK_ONESHOT -extern struct cpumask *tick_get_broadcast_oneshot_mask(void); -# endif - -# endif /* BROADCAST */ - -# ifdef CONFIG_TICK_ONESHOT -extern void tick_clock_notify(void); -extern int tick_check_oneshot_change(int allow_nohz); -extern struct tick_sched *tick_get_tick_sched(int cpu); +#ifdef CONFIG_TICK_ONESHOT extern void tick_irq_enter(void); -extern int tick_oneshot_mode_active(void); # ifndef arch_needs_cpu # define arch_needs_cpu() (0) # endif # else -static inline void tick_clock_notify(void) { } -static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } -static inline void tick_irq_enter(void) { } -static inline int tick_oneshot_mode_active(void) { return 0; } -# endif - -#else /* CONFIG_GENERIC_CLOCKEVENTS */ -static inline void tick_init(void) { } -static inline void tick_freeze(void) { } -static inline void tick_unfreeze(void) { } -static inline void tick_cancel_sched_timer(int cpu) { } -static inline void tick_clock_notify(void) { } -static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } static inline void tick_irq_enter(void) { } -static inline int tick_oneshot_mode_active(void) { return 0; } -#endif /* !CONFIG_GENERIC_CLOCKEVENTS */ - -# ifdef CONFIG_NO_HZ_COMMON -DECLARE_PER_CPU(struct tick_sched, tick_cpu_sched); - -static inline int tick_nohz_tick_stopped(void) -{ - return __this_cpu_read(tick_cpu_sched.tick_stopped); -} +#endif +#ifdef CONFIG_NO_HZ_COMMON +extern int tick_nohz_tick_stopped(void); extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); extern ktime_t tick_nohz_get_sleep_length(void); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); - -# else /* !CONFIG_NO_HZ_COMMON */ -static inline int tick_nohz_tick_stopped(void) -{ - return 0; -} - +#else /* !CONFIG_NO_HZ_COMMON */ +static inline int tick_nohz_tick_stopped(void) { return 0; } static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } @@ -163,7 +65,7 @@ static inline ktime_t tick_nohz_get_sleep_length(void) } static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } -# endif /* !CONFIG_NO_HZ_COMMON */ +#endif /* !CONFIG_NO_HZ_COMMON */ #ifdef CONFIG_NO_HZ_FULL extern bool tick_nohz_full_running; -- cgit v1.2.3 From 4ffee521f36390c7720d493591b764ca35c8030b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Mar 2015 13:09:16 +0100 Subject: clockevents: Make suspend/resume calls explicit clockevents_notify() is a leftover from the early design of the clockevents facility. It's really not a notification mechanism, it's a multiplex call. We are way better off to have explicit calls instead of this monstrosity. Split out the suspend/resume() calls and invoke them directly from the call sites. No locking required at this point because these calls happen with interrupts disabled and a single cpu online. Signed-off-by: Thomas Gleixner [ Rebased on top of 4.0-rc5. ] Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/713674030.jVm1qaHuPf@vostro.rjw.lan [ Rebased on top of latest timers/core. ] Signed-off-by: Ingo Molnar --- include/linux/clockchips.h | 2 -- include/linux/tick.h | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index bc3af821350b..50ce9750754f 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -16,8 +16,6 @@ enum clock_event_nofitiers { CLOCK_EVT_NOTIFY_BROADCAST_FORCE, CLOCK_EVT_NOTIFY_BROADCAST_ENTER, CLOCK_EVT_NOTIFY_BROADCAST_EXIT, - CLOCK_EVT_NOTIFY_SUSPEND, - CLOCK_EVT_NOTIFY_RESUME, CLOCK_EVT_NOTIFY_CPU_DYING, CLOCK_EVT_NOTIFY_CPU_DEAD, }; diff --git a/include/linux/tick.h b/include/linux/tick.h index f9a2d2687a46..7e07e0e3d898 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -29,10 +29,13 @@ extern struct tick_device *tick_get_device(int cpu); extern void __init tick_init(void); extern void tick_freeze(void); extern void tick_unfreeze(void); +/* Should be core only, but XEN resume magic abuses this interface */ +extern void tick_resume(void); #else /* CONFIG_GENERIC_CLOCKEVENTS */ static inline void tick_init(void) { } static inline void tick_freeze(void) { } static inline void tick_unfreeze(void) { } +static inline void tick_resume(void) { } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ #ifdef CONFIG_TICK_ONESHOT -- cgit v1.2.3 From f46481d0a7cb942b84145acb80ad43bdb1ff8eb4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Mar 2015 13:11:04 +0100 Subject: tick/xen: Provide and use tick_suspend_local() and tick_resume_local() Xen calls on every cpu into tick_resume() which is just wrong. tick_resume() is for the syscore global suspend/resume invocation. What XEN really wants is a per cpu local resume function. Provide a tick_resume_local() function and use it in XEN. Also provide a complementary tick_suspend_local() and modify tick_unfreeze() and tick_freeze(), respectively, to use the new local tick resume/suspend functions. Signed-off-by: Thomas Gleixner [ Combined two patches, rebased, modified subject/changelog. ] Signed-off-by: Rafael J. Wysocki Cc: Boris Ostrovsky Cc: David Vrabel Cc: Konrad Rzeszutek Wilk Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1698741.eezk9tnXtG@vostro.rjw.lan [ Merged to latest timers/core. ] Signed-off-by: Ingo Molnar --- include/linux/tick.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index 7e07e0e3d898..a3d4d2840e7f 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -29,13 +29,13 @@ extern struct tick_device *tick_get_device(int cpu); extern void __init tick_init(void); extern void tick_freeze(void); extern void tick_unfreeze(void); -/* Should be core only, but XEN resume magic abuses this interface */ -extern void tick_resume(void); +/* Should be core only, but XEN resume magic requires this */ +extern void tick_resume_local(void); #else /* CONFIG_GENERIC_CLOCKEVENTS */ static inline void tick_init(void) { } static inline void tick_freeze(void) { } static inline void tick_unfreeze(void) { } -static inline void tick_resume(void) { } +static inline void tick_resume_local(void) { } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ #ifdef CONFIG_TICK_ONESHOT -- cgit v1.2.3 From 7270d11c56f594af4d166b2988421cd8ed933dc1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Mar 2015 13:11:52 +0100 Subject: arm/bL_switcher: Kill tick suspend hackery Use the new tick_suspend/resume_local() and get rid of the homebrewn implementation of these in the ARM bL switcher. The check for the cpumask is completely pointless. There is no harm to suspend a per cpu tick device unconditionally. If that's a real issue then we fix it proper at the core level and not with some completely undocumented hacks in some random core code. Move the tick internals to the core code, now that this nuisance is gone. Signed-off-by: Thomas Gleixner [ rjw: Rebase, changelog ] Signed-off-by: Rafael J. Wysocki Cc: Nicolas Pitre Cc: Peter Zijlstra Cc: Russell King Link: http://lkml.kernel.org/r/1655112.Ws17YsMfN7@vostro.rjw.lan Signed-off-by: Ingo Molnar --- include/linux/clockchips.h | 6 ------ include/linux/tick.h | 19 ++++--------------- 2 files changed, 4 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 50ce9750754f..3ac7e2d90374 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -198,12 +198,6 @@ clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 minsec) freq, minsec); } -/* Should be core only, but is abused by arm bl_switcher */ -extern void clockevents_set_state(struct clock_event_device *dev, - enum clock_event_state state); -extern int clockevents_program_event(struct clock_event_device *dev, - ktime_t expires, bool force); - extern void clockevents_suspend(void); extern void clockevents_resume(void); diff --git a/include/linux/tick.h b/include/linux/tick.h index a3d4d2840e7f..589868b09aff 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -11,30 +11,19 @@ #include #include -/* ARM BL switcher abuse support */ -#ifdef CONFIG_GENERIC_CLOCKEVENTS -enum tick_device_mode { - TICKDEV_MODE_PERIODIC, - TICKDEV_MODE_ONESHOT, -}; - -struct tick_device { - struct clock_event_device *evtdev; - enum tick_device_mode mode; -}; -extern struct tick_device *tick_get_device(int cpu); -#endif - #ifdef CONFIG_GENERIC_CLOCKEVENTS extern void __init tick_init(void); extern void tick_freeze(void); extern void tick_unfreeze(void); -/* Should be core only, but XEN resume magic requires this */ +/* Should be core only, but ARM BL switcher requires it */ +extern void tick_suspend_local(void); +/* Should be core only, but XEN resume magic and ARM BL switcher require it */ extern void tick_resume_local(void); #else /* CONFIG_GENERIC_CLOCKEVENTS */ static inline void tick_init(void) { } static inline void tick_freeze(void) { } static inline void tick_unfreeze(void) { } +static inline void tick_suspend_local(void) { } static inline void tick_resume_local(void) { } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ -- cgit v1.2.3 From 138173d4e826587da66c7d321da1a91283222536 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Mon, 1 Dec 2014 07:58:18 +0100 Subject: MIPS: BCM47xx: Move NVRAM header to the include/linux/. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are two reasons for having this header in the common place: 1) Simplifying drivers that read NVRAM entries. We will be able to safely call bcm47xx_nvram_* functions without #ifdef-s. 2) Getting NVRAM driver out of MIPS arch code. This is needed to support BCM5301X arch which also requires this NVRAM driver. Patch for that will follow once we get is reviewed. Signed-off-by: Rafał Miłecki Acked-by: Hauke Mehrtens Cc: linux-mips@linux-mips.org Cc: Arnd Bergmann Cc: Paul Walmsley Cc: linux-soc@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/8619/ Signed-off-by: Ralf Baechle --- include/linux/bcm47xx_nvram.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 include/linux/bcm47xx_nvram.h (limited to 'include/linux') diff --git a/include/linux/bcm47xx_nvram.h b/include/linux/bcm47xx_nvram.h new file mode 100644 index 000000000000..b12b07e75929 --- /dev/null +++ b/include/linux/bcm47xx_nvram.h @@ -0,0 +1,34 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __BCM47XX_NVRAM_H +#define __BCM47XX_NVRAM_H + +#include +#include + +#ifdef CONFIG_BCM47XX +int bcm47xx_nvram_init_from_mem(u32 base, u32 lim); +int bcm47xx_nvram_getenv(const char *name, char *val, size_t val_len); +int bcm47xx_nvram_gpio_pin(const char *name); +#else +static inline int bcm47xx_nvram_init_from_mem(u32 base, u32 lim) +{ + return -ENOTSUPP; +}; +static inline int bcm47xx_nvram_getenv(const char *name, char *val, + size_t val_len) +{ + return -ENOTSUPP; +}; +static inline int bcm47xx_nvram_gpio_pin(const char *name) +{ + return -ENOTSUPP; +}; +#endif + +#endif /* __BCM47XX_NVRAM_H */ -- cgit v1.2.3 From 6261b06de565baafa590e58a531a1a5522cea0b6 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 24 Mar 2015 18:56:05 -0700 Subject: regulator: Defer lookup of supply to regulator_get Instead of resolving regulator supplies during registration move this to the time of a consumer retrieving a handle. The benefit is that it's possible for one driver to register regulators with internal dependencies out of order. Signed-off-by: Bjorn Andersson Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index d4ad5b5a02bb..6d9fcd0c33d6 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -367,6 +367,7 @@ struct regulator_dev { struct device dev; struct regulation_constraints *constraints; struct regulator *supply; /* for tree */ + const char *supply_name; struct regmap *regmap; struct delayed_work disable_work; -- cgit v1.2.3 From 9eed56e889d8a0bb7870e1216d8d4326dd63ec50 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Apr 2015 11:26:23 +0200 Subject: clockevents: Clean up clockchips.h Do various cleanups on the clockchips.h file: - indent preprocessor blocks to make it more clear which block we are in, this also makes merge resolution easier - comment larger preprocessor blocks consistently, using the: #if FOO ... #else /* !FOO: */ ... #endif /* !FOO */ notation. - unbreak lines - etc. No change in functionality. Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/clockchips.h | 87 ++++++++++++++++++++++------------------------ 1 file changed, 41 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 3ac7e2d90374..caeca76d7b32 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -22,17 +22,17 @@ enum clock_event_nofitiers { #ifdef CONFIG_GENERIC_CLOCKEVENTS -#include -#include -#include -#include +# include +# include +# include +# include struct clock_event_device; struct module; /* Clock event mode commands for legacy ->set_mode(): OBSOLETE */ enum clock_event_mode { - CLOCK_EVT_MODE_UNUSED = 0, + CLOCK_EVT_MODE_UNUSED, CLOCK_EVT_MODE_SHUTDOWN, CLOCK_EVT_MODE_PERIODIC, CLOCK_EVT_MODE_ONESHOT, @@ -51,7 +51,7 @@ enum clock_event_mode { * from DETACHED or SHUTDOWN. */ enum clock_event_state { - CLOCK_EVT_STATE_DETACHED = 0, + CLOCK_EVT_STATE_DETACHED, CLOCK_EVT_STATE_SHUTDOWN, CLOCK_EVT_STATE_PERIODIC, CLOCK_EVT_STATE_ONESHOT, @@ -60,28 +60,29 @@ enum clock_event_state { /* * Clock event features */ -#define CLOCK_EVT_FEAT_PERIODIC 0x000001 -#define CLOCK_EVT_FEAT_ONESHOT 0x000002 -#define CLOCK_EVT_FEAT_KTIME 0x000004 +# define CLOCK_EVT_FEAT_PERIODIC 0x000001 +# define CLOCK_EVT_FEAT_ONESHOT 0x000002 +# define CLOCK_EVT_FEAT_KTIME 0x000004 + /* - * x86(64) specific misfeatures: + * x86(64) specific (mis)features: * * - Clockevent source stops in C3 State and needs broadcast support. * - Local APIC timer is used as a dummy device. */ -#define CLOCK_EVT_FEAT_C3STOP 0x000008 -#define CLOCK_EVT_FEAT_DUMMY 0x000010 +# define CLOCK_EVT_FEAT_C3STOP 0x000008 +# define CLOCK_EVT_FEAT_DUMMY 0x000010 /* * Core shall set the interrupt affinity dynamically in broadcast mode */ -#define CLOCK_EVT_FEAT_DYNIRQ 0x000020 -#define CLOCK_EVT_FEAT_PERCPU 0x000040 +# define CLOCK_EVT_FEAT_DYNIRQ 0x000020 +# define CLOCK_EVT_FEAT_PERCPU 0x000040 /* * Clockevent device is based on a hrtimer for broadcast */ -#define CLOCK_EVT_FEAT_HRTIMER 0x000080 +# define CLOCK_EVT_FEAT_HRTIMER 0x000080 /** * struct clock_event_device - clock event device descriptor @@ -116,10 +117,8 @@ enum clock_event_state { */ struct clock_event_device { void (*event_handler)(struct clock_event_device *); - int (*set_next_event)(unsigned long evt, - struct clock_event_device *); - int (*set_next_ktime)(ktime_t expires, - struct clock_event_device *); + int (*set_next_event)(unsigned long evt, struct clock_event_device *); + int (*set_next_ktime)(ktime_t expires, struct clock_event_device *); ktime_t next_event; u64 max_delta_ns; u64 min_delta_ns; @@ -136,8 +135,7 @@ struct clock_event_device { * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME. * - set_state_{shutdown|periodic|oneshot}(), tick_resume(). */ - void (*set_mode)(enum clock_event_mode mode, - struct clock_event_device *); + void (*set_mode)(enum clock_event_mode mode, struct clock_event_device *); int (*set_state_periodic)(struct clock_event_device *); int (*set_state_oneshot)(struct clock_event_device *); int (*set_state_shutdown)(struct clock_event_device *); @@ -169,18 +167,18 @@ struct clock_event_device { * * factor = (clock_ticks << shift) / nanoseconds */ -static inline unsigned long div_sc(unsigned long ticks, unsigned long nsec, - int shift) +static inline unsigned long +div_sc(unsigned long ticks, unsigned long nsec, int shift) { - uint64_t tmp = ((uint64_t)ticks) << shift; + u64 tmp = ((u64)ticks) << shift; do_div(tmp, nsec); + return (unsigned long) tmp; } /* Clock event layer functions */ -extern u64 clockevent_delta2ns(unsigned long latch, - struct clock_event_device *evt); +extern u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt); extern void clockevents_register_device(struct clock_event_device *dev); extern int clockevents_unbind_device(struct clock_event_device *ced, int cpu); @@ -194,42 +192,39 @@ extern int clockevents_update_freq(struct clock_event_device *ce, u32 freq); static inline void clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 minsec) { - return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC, - freq, minsec); + return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC, freq, minsec); } extern void clockevents_suspend(void); extern void clockevents_resume(void); -#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST -#ifdef CONFIG_ARCH_HAS_TICK_BROADCAST +# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +# ifdef CONFIG_ARCH_HAS_TICK_BROADCAST extern void tick_broadcast(const struct cpumask *mask); -#else -#define tick_broadcast NULL -#endif +# else +# define tick_broadcast NULL +# endif extern int tick_receive_broadcast(void); -#endif +# endif -#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) +# if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) extern void tick_setup_hrtimer_broadcast(void); extern int tick_check_broadcast_expired(void); -#else +# else static inline int tick_check_broadcast_expired(void) { return 0; } -static inline void tick_setup_hrtimer_broadcast(void) {}; -#endif +static inline void tick_setup_hrtimer_broadcast(void) { } +# endif extern int clockevents_notify(unsigned long reason, void *arg); -#else /* CONFIG_GENERIC_CLOCKEVENTS */ - -static inline void clockevents_suspend(void) {} -static inline void clockevents_resume(void) {} +#else /* !CONFIG_GENERIC_CLOCKEVENTS: */ +static inline void clockevents_suspend(void) { } +static inline void clockevents_resume(void) { } static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; } static inline int tick_check_broadcast_expired(void) { return 0; } -static inline void tick_setup_hrtimer_broadcast(void) {}; -static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; } +static inline void tick_setup_hrtimer_broadcast(void) { } -#endif +#endif /* !CONFIG_GENERIC_CLOCKEVENTS */ -#endif +#endif /* _LINUX_CLOCKCHIPS_H */ -- cgit v1.2.3 From 4e537f7fbdce5e8ae7c33ebaa8a1956c7727d5a7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 25 Mar 2015 12:49:18 -0700 Subject: bpf: Make internal bpf API independent of CONFIG_BPF_SYSCALL #ifdefs Socket filter code and other subsystems with upcoming eBPF support should not need to deal with the fact that we have CONFIG_BPF_SYSCALL defined or not. Having the bpf syscall as a config option is a nice thing and I'd expect it to stay that way for expert users (I presume one day the default setting of it might change, though), but code making use of it should not care if it's actually enabled or not. Instead, hide this via header files and let the rest deal with it. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Reviewed-by: Masami Hiramatsu Cc: Arnaldo Carvalho de Melo Cc: David S. Miller Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1427312966-8434-2-git-send-email-ast@plumgrid.com Signed-off-by: Ingo Molnar --- include/linux/bpf.h | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bbfceb756452..c2e21113ecc0 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -113,8 +113,6 @@ struct bpf_prog_type_list { enum bpf_prog_type type; }; -void bpf_register_prog_type(struct bpf_prog_type_list *tl); - struct bpf_prog; struct bpf_prog_aux { @@ -129,11 +127,25 @@ struct bpf_prog_aux { }; #ifdef CONFIG_BPF_SYSCALL +void bpf_register_prog_type(struct bpf_prog_type_list *tl); + void bpf_prog_put(struct bpf_prog *prog); +struct bpf_prog *bpf_prog_get(u32 ufd); #else -static inline void bpf_prog_put(struct bpf_prog *prog) {} +static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl) +{ +} + +static inline struct bpf_prog *bpf_prog_get(u32 ufd) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void bpf_prog_put(struct bpf_prog *prog) +{ +} #endif -struct bpf_prog *bpf_prog_get(u32 ufd); + /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog *fp, union bpf_attr *attr); -- cgit v1.2.3 From 72cbbc8994242b5b43753738c01bf07bf29cb70d Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 25 Mar 2015 12:49:19 -0700 Subject: tracing: Add kprobe flag add TRACE_EVENT_FL_KPROBE flag to differentiate kprobe type of tracepoints, since bpf programs can only be attached to kprobe type of PERF_TYPE_TRACEPOINT perf events. Signed-off-by: Alexei Starovoitov Reviewed-by: Steven Rostedt Reviewed-by: Masami Hiramatsu Cc: Andrew Morton Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: David S. Miller Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1427312966-8434-3-git-send-email-ast@plumgrid.com Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index c674ee8f7fca..77325e1a1816 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -252,6 +252,7 @@ enum { TRACE_EVENT_FL_WAS_ENABLED_BIT, TRACE_EVENT_FL_USE_CALL_FILTER_BIT, TRACE_EVENT_FL_TRACEPOINT_BIT, + TRACE_EVENT_FL_KPROBE_BIT, }; /* @@ -265,6 +266,7 @@ enum { * it is best to clear the buffers that used it). * USE_CALL_FILTER - For ftrace internal events, don't use file filter * TRACEPOINT - Event is a tracepoint + * KPROBE - Event is a kprobe */ enum { TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), @@ -274,6 +276,7 @@ enum { TRACE_EVENT_FL_WAS_ENABLED = (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT), TRACE_EVENT_FL_USE_CALL_FILTER = (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT), TRACE_EVENT_FL_TRACEPOINT = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT), + TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT), }; struct ftrace_event_call { -- cgit v1.2.3 From 2541517c32be2531e0da59dfd7efc1ce844644f5 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 25 Mar 2015 12:49:20 -0700 Subject: tracing, perf: Implement BPF programs attached to kprobes BPF programs, attached to kprobes, provide a safe way to execute user-defined BPF byte-code programs without being able to crash or hang the kernel in any way. The BPF engine makes sure that such programs have a finite execution time and that they cannot break out of their sandbox. The user interface is to attach to a kprobe via the perf syscall: struct perf_event_attr attr = { .type = PERF_TYPE_TRACEPOINT, .config = event_id, ... }; event_fd = perf_event_open(&attr,...); ioctl(event_fd, PERF_EVENT_IOC_SET_BPF, prog_fd); 'prog_fd' is a file descriptor associated with BPF program previously loaded. 'event_id' is an ID of the kprobe created. Closing 'event_fd': close(event_fd); ... automatically detaches BPF program from it. BPF programs can call in-kernel helper functions to: - lookup/update/delete elements in maps - probe_read - wraper of probe_kernel_read() used to access any kernel data structures BPF programs receive 'struct pt_regs *' as an input ('struct pt_regs' is architecture dependent) and return 0 to ignore the event and 1 to store kprobe event into the ring buffer. Note, kprobes are a fundamentally _not_ a stable kernel ABI, so BPF programs attached to kprobes must be recompiled for every kernel version and user must supply correct LINUX_VERSION_CODE in attr.kern_version during bpf_prog_load() call. Signed-off-by: Alexei Starovoitov Reviewed-by: Steven Rostedt Reviewed-by: Masami Hiramatsu Cc: Andrew Morton Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: David S. Miller Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1427312966-8434-4-git-send-email-ast@plumgrid.com Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 77325e1a1816..0aa535bc9f05 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -13,6 +13,7 @@ struct trace_array; struct trace_buffer; struct tracer; struct dentry; +struct bpf_prog; struct trace_print_flags { unsigned long mask; @@ -306,6 +307,7 @@ struct ftrace_event_call { #ifdef CONFIG_PERF_EVENTS int perf_refcount; struct hlist_head __percpu *perf_events; + struct bpf_prog *prog; int (*perf_perm)(struct ftrace_event_call *, struct perf_event *); @@ -551,6 +553,15 @@ event_trigger_unlock_commit_regs(struct ftrace_event_file *file, event_triggers_post_call(file, tt); } +#ifdef CONFIG_BPF_SYSCALL +unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx); +#else +static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) +{ + return 1; +} +#endif + enum { FILTER_OTHER = 0, FILTER_STATIC_STRING, -- cgit v1.2.3 From 345527b1edce8df719e0884500c76832a18211c3 Mon Sep 17 00:00:00 2001 From: Preeti U Murthy Date: Mon, 30 Mar 2015 14:59:19 +0530 Subject: clockevents: Fix cpu_down() race for hrtimer based broadcasting It was found when doing a hotplug stress test on POWER, that the machine either hit softlockups or rcu_sched stall warnings. The issue was traced to commit: 7cba160ad789 ("powernv/cpuidle: Redesign idle states management") which exposed the cpu_down() race with hrtimer based broadcast mode: 5d1638acb9f6 ("tick: Introduce hrtimer based broadcast") The race is the following: Assume CPU1 is the CPU which holds the hrtimer broadcasting duty before it is taken down. CPU0 CPU1 cpu_down() take_cpu_down() disable_interrupts() cpu_die() while (CPU1 != CPU_DEAD) { msleep(100); switch_to_idle(); stop_cpu_timer(); schedule_broadcast(); } tick_cleanup_cpu_dead() take_over_broadcast() So after CPU1 disabled interrupts it cannot handle the broadcast hrtimer anymore, so CPU0 will be stuck forever. Fix this by explicitly taking over broadcast duty before cpu_die(). This is a temporary workaround. What we really want is a callback in the clockevent device which allows us to do that from the dying CPU by pushing the hrtimer onto a different cpu. That might involve an IPI and is definitely more complex than this immediate fix. Changelog was picked up from: https://lkml.org/lkml/2015/2/16/213 Suggested-by: Thomas Gleixner Tested-by: Nicolas Pitre Signed-off-by: Preeti U. Murthy Cc: linuxppc-dev@lists.ozlabs.org Cc: mpe@ellerman.id.au Cc: nicolas.pitre@linaro.org Cc: peterz@infradead.org Cc: rjw@rjwysocki.net Fixes: http://linuxppc.10917.n7.nabble.com/offlining-cpus-breakage-td88619.html Link: http://lkml.kernel.org/r/20150330092410.24979.59887.stgit@preeti.in.ibm.com [ Merged it to the latest timer tree, renamed the callback, tidied up the changelog. ] Signed-off-by: Ingo Molnar --- include/linux/tick.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index 589868b09aff..f9ff225d53c0 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -36,6 +36,12 @@ extern void tick_irq_enter(void); static inline void tick_irq_enter(void) { } #endif +#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) +extern void hotplug_cpu__broadcast_tick_pull(int dead_cpu); +#else +static inline void hotplug_cpu__broadcast_tick_pull(int dead_cpu) { } +#endif + #ifdef CONFIG_NO_HZ_COMMON extern int tick_nohz_tick_stopped(void); extern void tick_nohz_idle_enter(void); -- cgit v1.2.3 From 30ceec6284129662efc3a1e7675b2bd857a046fe Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 24 Feb 2015 10:05:27 +0000 Subject: FS-Cache: When submitting an op, cancel it if the target object is dying When submitting an operation, prefer to cancel the operation immediately rather than queuing it for later processing if the object is marked as dying (ie. the object state machine has reached the KILL_OBJECT state). Whilst we're at it, change the series of related test_bit() calls into a READ_ONCE() and bitwise-AND operators to reduce the number of load instructions (test_bit() has a volatile address). Signed-off-by: David Howells Reviewed-by: Steve Dickson Acked-by: Jeff Layton --- include/linux/fscache-cache.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index c9dafdaf3347..2e83a141e465 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -411,17 +411,22 @@ static inline bool fscache_object_is_available(struct fscache_object *object) return test_bit(FSCACHE_OBJECT_IS_AVAILABLE, &object->flags); } +static inline bool fscache_cache_is_broken(struct fscache_object *object) +{ + return test_bit(FSCACHE_IOERROR, &object->cache->flags); +} + static inline bool fscache_object_is_active(struct fscache_object *object) { return fscache_object_is_available(object) && fscache_object_is_live(object) && - !test_bit(FSCACHE_IOERROR, &object->cache->flags); + !fscache_cache_is_broken(object); } static inline bool fscache_object_is_dead(struct fscache_object *object) { return fscache_object_is_dying(object) && - test_bit(FSCACHE_IOERROR, &object->cache->flags); + fscache_cache_is_broken(object); } /** -- cgit v1.2.3 From 87021526300f1a292dd966e141e183630ac95317 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 24 Feb 2015 10:52:51 +0000 Subject: FS-Cache: fscache_object_is_dead() has wrong logic, kill it fscache_object_is_dead() returns true only if the object is marked dead and the cache got an I/O error. This should be a logical OR instead. Since two of the callers got split up into handling for separate subcases, expand the other callers and kill the function. This is probably the right thing to do anyway since one of the subcases isn't about the object at all, but rather about the cache. Signed-off-by: David Howells Reviewed-by: Steve Dickson Acked-by: Jeff Layton --- include/linux/fscache-cache.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 2e83a141e465..ca3d550da11e 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -423,12 +423,6 @@ static inline bool fscache_object_is_active(struct fscache_object *object) !fscache_cache_is_broken(object); } -static inline bool fscache_object_is_dead(struct fscache_object *object) -{ - return fscache_object_is_dying(object) && - fscache_cache_is_broken(object); -} - /** * fscache_object_destroyed - Note destruction of an object in a cache * @cache: The cache from which the object came -- cgit v1.2.3 From 1339ec98e32b4bc8efb6fbb71c006a465130aaba Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 25 Feb 2015 13:26:39 +0000 Subject: FS-Cache: Out of line fscache_operation_init() Out of line fscache_operation_init() so that it can access internal FS-Cache features, such as stats, in a later commit. Signed-off-by: David Howells Reviewed-by: Steve Dickson Acked-by: Jeff Layton --- include/linux/fscache-cache.h | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index ca3d550da11e..0e26d49972e3 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -119,27 +119,9 @@ extern void fscache_op_work_func(struct work_struct *work); extern void fscache_enqueue_operation(struct fscache_operation *); extern void fscache_op_complete(struct fscache_operation *, bool); extern void fscache_put_operation(struct fscache_operation *); - -/** - * fscache_operation_init - Do basic initialisation of an operation - * @op: The operation to initialise - * @release: The release function to assign - * - * Do basic initialisation of an operation. The caller must still set flags, - * object and processor if needed. - */ -static inline void fscache_operation_init(struct fscache_operation *op, - fscache_operation_processor_t processor, - fscache_operation_release_t release) -{ - INIT_WORK(&op->work, fscache_op_work_func); - atomic_set(&op->usage, 1); - op->state = FSCACHE_OP_ST_INITIALISED; - op->debug_id = atomic_inc_return(&fscache_op_debug_id); - op->processor = processor; - op->release = release; - INIT_LIST_HEAD(&op->pend_link); -} +extern void fscache_operation_init(struct fscache_operation *, + fscache_operation_processor_t, + fscache_operation_release_t); /* * data read operation -- cgit v1.2.3 From d3b97ca4a99e4e6c78f5a21c968eadf5c8ba9971 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 24 Feb 2015 10:05:29 +0000 Subject: FS-Cache: The operation cancellation method needs calling in more places Any time an incomplete operation is cancelled, the operation cancellation function needs to be called to clean up. This is currently being passed directly to some of the functions that might want to call it, but not all. Instead, pass the cancellation method pointer to the fscache_operation_init() and have that cache it in the operation struct. Further, plug in a dummy cancellation handler if the caller declines to set one as this allows us to call the function unconditionally (the extra overhead isn't worth bothering about as we don't expect to be calling this typically). The cancellation method must thence be called everywhere the CANCELLED state is set. Note that we call it *before* setting the CANCELLED state such that the method can use the old state value to guide its operation. fscache_do_cancel_retrieval() needs moving higher up in the sources so that the init function can use it now. Without this, the following oops may be seen: FS-Cache: Assertion failed FS-Cache: 3 == 0 is false ------------[ cut here ]------------ kernel BUG at ../fs/fscache/page.c:261! ... RIP: 0010:[] fscache_release_retrieval_op+0x77/0x100 [] fscache_put_operation+0x114/0x2da [] __fscache_read_or_alloc_pages+0x358/0x3b3 [] __nfs_readpages_from_fscache+0x59/0xbf [nfs] [] nfs_readpages+0x10c/0x185 [nfs] [] ? alloc_pages_current+0x119/0x13e [] ? __page_cache_alloc+0xfb/0x10a [] __do_page_cache_readahead+0x188/0x22c [] ondemand_readahead+0x29e/0x2af [] page_cache_sync_readahead+0x38/0x3a [] generic_file_read_iter+0x1a2/0x55a [] ? nfs_revalidate_mapping+0xd6/0x288 [nfs] [] nfs_file_read+0x49/0x70 [nfs] [] new_sync_read+0x78/0x9c [] __vfs_read+0x13/0x38 [] vfs_read+0x95/0x121 [] SyS_read+0x4c/0x8a [] system_call_fastpath+0x12/0x17 The assertion is showing that the remaining number of pages (n_pages) is not 0 when the operation is being released. Signed-off-by: David Howells Reviewed-by: Steve Dickson Acked-by: Jeff Layton --- include/linux/fscache-cache.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 0e26d49972e3..69c6eb10d858 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -74,6 +74,7 @@ extern wait_queue_head_t fscache_cache_cleared_wq; */ typedef void (*fscache_operation_release_t)(struct fscache_operation *op); typedef void (*fscache_operation_processor_t)(struct fscache_operation *op); +typedef void (*fscache_operation_cancel_t)(struct fscache_operation *op); enum fscache_operation_state { FSCACHE_OP_ST_BLANK, /* Op is not yet submitted */ @@ -109,6 +110,9 @@ struct fscache_operation { * the op in a non-pool thread */ fscache_operation_processor_t processor; + /* Operation cancellation cleanup (optional) */ + fscache_operation_cancel_t cancel; + /* operation releaser */ fscache_operation_release_t release; }; @@ -121,6 +125,7 @@ extern void fscache_op_complete(struct fscache_operation *, bool); extern void fscache_put_operation(struct fscache_operation *); extern void fscache_operation_init(struct fscache_operation *, fscache_operation_processor_t, + fscache_operation_cancel_t, fscache_operation_release_t); /* -- cgit v1.2.3 From 4a47132ff472a0c2c5441baeb50cf97f2580bc43 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 24 Feb 2015 10:05:29 +0000 Subject: FS-Cache: Retain the netfs context in the retrieval op earlier Now that the retrieval operation may be disposed of by fscache_put_operation() before we actually set the context, the retrieval-specific cleanup operation can produce a NULL-pointer dereference when it tries to unconditionally clean up the netfs context. Given that it is expected that we'll get at least as far as the place where we currently set the context pointer and it is unlikely we'll go through the error handling paths prior to that point, retain the context right from the point that the retrieval op is allocated. Concomitant to this, we need to retain the cookie pointer in the retrieval op also so that we can call the netfs to release its context in the release method. In addition, we might now get into fscache_release_retrieval_op() with the op only initialised. To this end, set the operation to DEAD only after the release method has been called and skip the n_pages test upon cleanup if the op is still in the INITIALISED state. Without these changes, the following oops might be seen: BUG: unable to handle kernel NULL pointer dereference at 00000000000000b8 ... RIP: 0010:[] fscache_release_retrieval_op+0xae/0x100 ... Call Trace: [] fscache_put_operation+0x117/0x2e0 [] __fscache_read_or_alloc_pages+0x351/0x3ac [] __nfs_readpages_from_fscache+0x59/0xbf [nfs] [] nfs_readpages+0x10c/0x185 [nfs] [] ? alloc_pages_current+0x119/0x13e [] ? __page_cache_alloc+0xfb/0x10a [] __do_page_cache_readahead+0x188/0x22c [] ondemand_readahead+0x29e/0x2af [] page_cache_sync_readahead+0x38/0x3a [] generic_file_read_iter+0x1a2/0x55a [] ? nfs_revalidate_mapping+0xd6/0x288 [nfs] [] nfs_file_read+0x49/0x70 [nfs] [] new_sync_read+0x78/0x9c [] __vfs_read+0x13/0x38 [] vfs_read+0x95/0x121 [] SyS_read+0x4c/0x8a [] system_call_fastpath+0x12/0x17 Signed-off-by: David Howells Reviewed-by: Steve Dickson Acked-by: Jeff Layton --- include/linux/fscache-cache.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 69c6eb10d858..604e1526cd00 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -133,6 +133,7 @@ extern void fscache_operation_init(struct fscache_operation *, */ struct fscache_retrieval { struct fscache_operation op; + struct fscache_cookie *cookie; /* The netfs cookie */ struct address_space *mapping; /* netfs pages */ fscache_rw_complete_t end_io_func; /* function to call on I/O completion */ void *context; /* netfs read context (pinned) */ -- cgit v1.2.3 From 45bfb2e50471abbbfd83d40d28c986078b0d24ff Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 14 Jan 2015 14:18:11 +0200 Subject: perf: Add AUX area to ring buffer for raw data streams This patch introduces "AUX space" in the perf mmap buffer, intended for exporting high bandwidth data streams to userspace, such as instruction flow traces. AUX space is a ring buffer, defined by aux_{offset,size} fields in the user_page structure, and read/write pointers aux_{head,tail}, which abide by the same rules as data_* counterparts of the main perf buffer. In order to allocate/mmap AUX, userspace needs to set up aux_offset to such an offset that will be greater than data_offset+data_size and aux_size to be the desired buffer size. Both need to be page aligned. Then, same aux_offset and aux_size should be passed to mmap() call and if everything adds up, you should have an AUX buffer as a result. Pages that are mapped into this buffer also come out of user's mlock rlimit plus perf_event_mlock_kb allowance. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Alexander Shishkin Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kaixu Xia Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul Mackerras Cc: Robert Richter Cc: Stephane Eranian Cc: Thomas Gleixner Cc: acme@infradead.org Cc: adrian.hunter@intel.com Cc: kan.liang@intel.com Cc: markus.t.metzger@intel.com Cc: mathieu.poirier@linaro.org Link: http://lkml.kernel.org/r/1421237903-181015-3-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 401554074de9..5a94f6d6fa91 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -284,6 +284,18 @@ struct pmu { * Return the count value for a counter. */ u64 (*count) (struct perf_event *event); /*optional*/ + + /* + * Set up pmu-private data structures for an AUX area + */ + void *(*setup_aux) (int cpu, void **pages, + int nr_pages, bool overwrite); + /* optional */ + + /* + * Free pmu-private AUX data structures + */ + void (*free_aux) (void *aux); /* optional */ }; /** @@ -862,6 +874,11 @@ static inline bool needs_branch_stack(struct perf_event *event) return event->attr.branch_sample_type != 0; } +static inline bool has_aux(struct perf_event *event) +{ + return event->pmu->setup_aux; +} + extern int perf_output_begin(struct perf_output_handle *handle, struct perf_event *event, unsigned int size); extern void perf_output_end(struct perf_output_handle *handle); -- cgit v1.2.3 From 0a4e38e64f5e91ce131cc42ee5bb3925377ec840 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Wed, 14 Jan 2015 14:18:12 +0200 Subject: perf: Support high-order allocations for AUX space Some pmus (such as BTS or Intel PT without multiple-entry ToPA capability) don't support scatter-gather and will prefer larger contiguous areas for their output regions. This patch adds a new pmu capability to request higher order allocations. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kaixu Xia Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul Mackerras Cc: Robert Richter Cc: Stephane Eranian Cc: Thomas Gleixner Cc: acme@infradead.org Cc: adrian.hunter@intel.com Cc: kan.liang@intel.com Cc: markus.t.metzger@intel.com Cc: mathieu.poirier@linaro.org Link: http://lkml.kernel.org/r/1421237903-181015-4-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 5a94f6d6fa91..d5a4a8e95808 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -174,6 +174,7 @@ struct perf_event; */ #define PERF_PMU_CAP_NO_INTERRUPT 0x01 #define PERF_PMU_CAP_NO_NMI 0x02 +#define PERF_PMU_CAP_AUX_NO_SG 0x04 /** * struct pmu - generic performance monitoring unit -- cgit v1.2.3 From 6a279230391b63130070e0219b0ad09d34d28c89 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Wed, 14 Jan 2015 14:18:13 +0200 Subject: perf: Add a capability for AUX_NO_SG pmus to do software double buffering For pmus that don't support scatter-gather for AUX data in hardware, it might still make sense to implement software double buffering to avoid losing data while the user is reading data out. For this purpose, add a pmu capability that guarantees multiple high-order chunks for AUX buffer, so that the pmu driver can do switchover tricks. To make use of this feature, add PERF_PMU_CAP_AUX_SW_DOUBLEBUF to your pmu's capability mask. This will make the ring buffer AUX allocation code ensure that the biggest high order allocation for the aux buffer pages is no bigger than half of the total requested buffer size, thus making sure that the buffer has at least two high order allocations. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kaixu Xia Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul Mackerras Cc: Robert Richter Cc: Stephane Eranian Cc: Thomas Gleixner Cc: acme@infradead.org Cc: adrian.hunter@intel.com Cc: kan.liang@intel.com Cc: markus.t.metzger@intel.com Cc: mathieu.poirier@linaro.org Link: http://lkml.kernel.org/r/1421237903-181015-5-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index d5a4a8e95808..13a1eb3a2a2d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -175,6 +175,7 @@ struct perf_event; #define PERF_PMU_CAP_NO_INTERRUPT 0x01 #define PERF_PMU_CAP_NO_NMI 0x02 #define PERF_PMU_CAP_AUX_NO_SG 0x04 +#define PERF_PMU_CAP_AUX_SW_DOUBLEBUF 0x08 /** * struct pmu - generic performance monitoring unit -- cgit v1.2.3 From bed5b25ad9c8a2f5d735ef0bc746ec870c01c1b0 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Fri, 30 Jan 2015 12:31:06 +0200 Subject: perf: Add a pmu capability for "exclusive" events Usually, pmus that do, for example, instruction tracing, would only ever be able to have one event per task per cpu (or per perf_event_context). For such pmus it makes sense to disallow creating conflicting events early on, so as to provide consistent behavior for the user. This patch adds a pmu capability that indicates such constraint on event creation. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kaixu Xia Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul Mackerras Cc: Robert Richter Cc: Stephane Eranian Cc: Thomas Gleixner Cc: acme@infradead.org Cc: adrian.hunter@intel.com Cc: kan.liang@intel.com Cc: markus.t.metzger@intel.com Cc: mathieu.poirier@linaro.org Link: http://lkml.kernel.org/r/1422613866-113186-1-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 13a1eb3a2a2d..f936a1e51f29 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -176,6 +176,7 @@ struct perf_event; #define PERF_PMU_CAP_NO_NMI 0x02 #define PERF_PMU_CAP_AUX_NO_SG 0x04 #define PERF_PMU_CAP_AUX_SW_DOUBLEBUF 0x08 +#define PERF_PMU_CAP_EXCLUSIVE 0x10 /** * struct pmu - generic performance monitoring unit @@ -196,6 +197,7 @@ struct pmu { int * __percpu pmu_disable_count; struct perf_cpu_context * __percpu pmu_cpu_context; + atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */ int task_ctx_nr; int hrtimer_interval_ms; -- cgit v1.2.3 From fdc2670666f40ab3e03143f04d1ebf4a05e2c24a Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Wed, 14 Jan 2015 14:18:16 +0200 Subject: perf: Add API for PMUs to write to the AUX area For pmus that wish to write data to ring buffer's AUX area, provide perf_aux_output_{begin,end}() calls to initiate/commit data writes, similarly to perf_output_{begin,end}. These also use the same output handle structure. Also, similarly to software counterparts, these will direct inherited events' output to parents' ring buffers. After the perf_aux_output_begin() returns successfully, handle->size is set to the maximum amount of data that can be written wrt aux_tail pointer, so that no data that the user hasn't seen will be overwritten, therefore this should always be called before hardware writing is enabled. On success, this will return the pointer to pmu driver's private structure allocated for this aux area by pmu::setup_aux. Same pointer can also be retrieved using perf_get_aux() while hardware writing is enabled. PMU driver should pass the actual amount of data written as a parameter to perf_aux_output_end(). All hardware writes should be completed and visible before this one is called. Additionally, perf_aux_output_skip() will adjust output handle and aux_head in case some part of the buffer has to be skipped over to maintain hardware's alignment constraints. Nested writers are forbidden and guards are in place to catch such attempts. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kaixu Xia Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul Mackerras Cc: Robert Richter Cc: Stephane Eranian Cc: Thomas Gleixner Cc: acme@infradead.org Cc: adrian.hunter@intel.com Cc: kan.liang@intel.com Cc: markus.t.metzger@intel.com Cc: mathieu.poirier@linaro.org Link: http://lkml.kernel.org/r/1421237903-181015-8-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f936a1e51f29..45c5873ad9b3 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -573,7 +573,10 @@ struct perf_output_handle { struct ring_buffer *rb; unsigned long wakeup; unsigned long size; - void *addr; + union { + void *addr; + unsigned long head; + }; int page; }; @@ -608,6 +611,14 @@ perf_cgroup_from_task(struct task_struct *task) #ifdef CONFIG_PERF_EVENTS +extern void *perf_aux_output_begin(struct perf_output_handle *handle, + struct perf_event *event); +extern void perf_aux_output_end(struct perf_output_handle *handle, + unsigned long size, bool truncated); +extern int perf_aux_output_skip(struct perf_output_handle *handle, + unsigned long size); +extern void *perf_get_aux(struct perf_output_handle *handle); + extern int perf_pmu_register(struct pmu *pmu, const char *name, int type); extern void perf_pmu_unregister(struct pmu *pmu); @@ -898,6 +909,17 @@ extern void perf_event_disable(struct perf_event *event); extern int __perf_event_disable(void *info); extern void perf_event_task_tick(void); #else /* !CONFIG_PERF_EVENTS: */ +static inline void * +perf_aux_output_begin(struct perf_output_handle *handle, + struct perf_event *event) { return NULL; } +static inline void +perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, + bool truncated) { } +static inline int +perf_aux_output_skip(struct perf_output_handle *handle, + unsigned long size) { return -EINVAL; } +static inline void * +perf_get_aux(struct perf_output_handle *handle) { return NULL; } static inline void perf_event_task_sched_in(struct task_struct *prev, struct task_struct *task) { } -- cgit v1.2.3 From ec0d7729bbaed4b9d2d3fada693278e13a3d1368 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Wed, 14 Jan 2015 14:18:23 +0200 Subject: perf: Add ITRACE_START record to indicate that tracing has started For counters that generate AUX data that is bound to the context of a running task, such as instruction tracing, the decoder needs to know exactly which task is running when the event is first scheduled in, before the first sched_switch. The decoder's need to know this stems from the fact that instruction flow trace decoding will almost always require program's object code in order to reconstruct said flow and for that we need at least its pid/tid in the perf stream. To single out such instruction tracing pmus, this patch introduces ITRACE PMU capability. The reason this is not part of RECORD_AUX record is that not all pmus capable of generating AUX data need this, and the opposite is *probably* also true. While sched_switch covers for most cases, there are two problems with it: the consumer will need to process events out of order (that is, having found RECORD_AUX, it will have to skip forward to the nearest sched_switch to figure out which task it was, then go back to the actual trace to decode it) and it completely misses the case when the tracing is enabled and disabled before sched_switch, for example, via PERF_EVENT_IOC_DISABLE. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kaixu Xia Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul Mackerras Cc: Robert Richter Cc: Stephane Eranian Cc: Thomas Gleixner Cc: acme@infradead.org Cc: adrian.hunter@intel.com Cc: kan.liang@intel.com Cc: markus.t.metzger@intel.com Cc: mathieu.poirier@linaro.org Link: http://lkml.kernel.org/r/1421237903-181015-15-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 45c5873ad9b3..61992cf2e977 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -129,6 +129,9 @@ struct hw_perf_event { struct list_head cqm_groups_entry; struct list_head cqm_group_entry; }; + struct { /* itrace */ + int itrace_started; + }; #ifdef CONFIG_HAVE_HW_BREAKPOINT struct { /* breakpoint */ /* @@ -177,6 +180,7 @@ struct perf_event; #define PERF_PMU_CAP_AUX_NO_SG 0x04 #define PERF_PMU_CAP_AUX_SW_DOUBLEBUF 0x08 #define PERF_PMU_CAP_EXCLUSIVE 0x10 +#define PERF_PMU_CAP_ITRACE 0x20 /** * struct pmu - generic performance monitoring unit -- cgit v1.2.3 From b3738d29323344da3017a91010530cf3a58590fc Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 17 Nov 2014 20:07:03 +0100 Subject: watchdog: Add watchdog enable/disable all functions This patch adds two new functions to enable/disable the watchdog across all CPUs. This will be used by the HT PMU bug workaround code to disable/enable the NMI watchdog across quirk enablement. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra (Intel) Cc: bp@alien8.de Cc: jolsa@redhat.com Cc: kan.liang@intel.com Cc: maria.n.dimakopoulou@gmail.com Cc: Frederic Weisbecker Cc: Don Zickus Cc: Andrew Morton Link: http://lkml.kernel.org/r/1416251225-17721-12-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- include/linux/watchdog.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index 395b70e0eccf..a746bf5216f8 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -137,4 +137,12 @@ extern int watchdog_init_timeout(struct watchdog_device *wdd, extern int watchdog_register_device(struct watchdog_device *); extern void watchdog_unregister_device(struct watchdog_device *); +#ifdef CONFIG_HARDLOCKUP_DETECTOR +void watchdog_nmi_disable_all(void); +void watchdog_nmi_enable_all(void); +#else +static inline void watchdog_nmi_disable_all(void) {} +static inline void watchdog_nmi_enable_all(void) {} +#endif + #endif /* ifndef _LINUX_WATCHDOG_H */ -- cgit v1.2.3 From b4a20144e0c0a45431695fa5968ce2ed8c9ce6ca Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 28 Jan 2015 14:29:02 +0100 Subject: gpu: host1x: Export host1x_syncpt_read() This function is used to read the current value of the syncpt and is useful in situations where drivers don't schedule work and wait for the syncpoint to increment. One particular use-case is using the syncpoint as a VBLANK counter. Signed-off-by: Thierry Reding --- include/linux/host1x.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 464f33814a94..d2ba7d334039 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -135,6 +135,7 @@ struct host1x_syncpt *host1x_syncpt_get(struct host1x *host, u32 id); u32 host1x_syncpt_id(struct host1x_syncpt *sp); u32 host1x_syncpt_read_min(struct host1x_syncpt *sp); u32 host1x_syncpt_read_max(struct host1x_syncpt *sp); +u32 host1x_syncpt_read(struct host1x_syncpt *sp); int host1x_syncpt_incr(struct host1x_syncpt *sp); u32 host1x_syncpt_incr_max(struct host1x_syncpt *sp, u32 incrs); int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout, -- cgit v1.2.3 From a54acb3a6f853e8394c4cb7b6a4d93c88f13eefd Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 2 Apr 2015 17:07:00 +0200 Subject: dev: introduce dev_get_iflink() The goal of this patch is to prepare the removal of the iflink field. It introduces a new ndo function, which will be implemented by virtual interfaces. There is no functional change into this patch. All readers of iflink field now call dev_get_iflink(). Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 967bb4c8caf1..788eb7a622ad 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1030,6 +1030,8 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * int queue_index, u32 maxrate); * Called when a user wants to set a max-rate limitation of specific * TX queue. + * int (*ndo_get_iflink)(const struct net_device *dev); + * Called to get the iflink value of this device. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1191,6 +1193,7 @@ struct net_device_ops { int (*ndo_set_tx_maxrate)(struct net_device *dev, int queue_index, u32 maxrate); + int (*ndo_get_iflink)(const struct net_device *dev); }; /** @@ -2149,6 +2152,7 @@ void __dev_remove_pack(struct packet_type *pt); void dev_add_offload(struct packet_offload *po); void dev_remove_offload(struct packet_offload *po); +int dev_get_iflink(const struct net_device *dev); struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags, unsigned short mask); struct net_device *dev_get_by_name(struct net *net, const char *name); -- cgit v1.2.3 From 7a66bbc96ce9ad8261fa5f7f6ae65370eb6866ee Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 2 Apr 2015 17:07:09 +0200 Subject: net: remove iflink field from struct net_device Now that all users of iflink have the ndo_get_iflink handler available, it's possible to remove this field. By default, dev_get_iflink() returns the ifindex of the interface. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 788eb7a622ad..846a1f5bc9db 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1538,7 +1538,7 @@ struct net_device { netdev_features_t mpls_features; int ifindex; - int iflink; + int group; struct net_device_stats stats; @@ -1741,7 +1741,6 @@ struct net_device { #endif struct phy_device *phydev; struct lock_class_key *qdisc_tx_busylock; - int group; struct pm_qos_request pm_qos_req; }; #define to_net_dev(d) container_of(d, struct net_device, dev) -- cgit v1.2.3 From 13e71d69cc7444b7d840bab581dbe831e440fb62 Mon Sep 17 00:00:00 2001 From: Markus Pargmann Date: Thu, 2 Apr 2015 10:11:35 +0200 Subject: nbd: Remove kernel internal header The header is not included anywhere. Remove it and include the private nbd_device struct in nbd.c. Signed-off-by: Markus Pargmann Signed-off-by: Jens Axboe --- include/linux/nbd.h | 46 ---------------------------------------------- 1 file changed, 46 deletions(-) delete mode 100644 include/linux/nbd.h (limited to 'include/linux') diff --git a/include/linux/nbd.h b/include/linux/nbd.h deleted file mode 100644 index f62f78aef4ac..000000000000 --- a/include/linux/nbd.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * 1999 Copyright (C) Pavel Machek, pavel@ucw.cz. This code is GPL. - * 1999/11/04 Copyright (C) 1999 VMware, Inc. (Regis "HPReg" Duchesne) - * Made nbd_end_request() use the io_request_lock - * 2001 Copyright (C) Steven Whitehouse - * New nbd_end_request() for compatibility with new linux block - * layer code. - * 2003/06/24 Louis D. Langholtz - * Removed unneeded blksize_bits field from nbd_device struct. - * Cleanup PARANOIA usage & code. - * 2004/02/19 Paul Clements - * Removed PARANOIA, plus various cleanup and comments - */ -#ifndef LINUX_NBD_H -#define LINUX_NBD_H - - -#include -#include -#include - -struct request; - -struct nbd_device { - int flags; - int harderror; /* Code of hard error */ - struct socket * sock; /* If == NULL, device is not ready, yet */ - int magic; - - spinlock_t queue_lock; - struct list_head queue_head; /* Requests waiting result */ - struct request *active_req; - wait_queue_head_t active_wq; - struct list_head waiting_queue; /* Requests to be sent */ - wait_queue_head_t waiting_wq; - - struct mutex tx_lock; - struct gendisk *disk; - int blksize; - u64 bytesize; - pid_t pid; /* pid of nbd-client, if attached */ - int xmit_timeout; - int disconnect; /* a disconnect has been requested by user */ -}; - -#endif -- cgit v1.2.3 From 802f42a8d99254b8602bf65cc50e8333bf479f13 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:06 +0300 Subject: net/mlx4: Add RSS support for fragmented IP datagrams Enable RSS support for fragmented IP packets, when device supports it. Until now, fragmented IP packets were directed only to the default_qpn. Since IP fragments (datagram) have no upper protocols (L3 IP packets), hash is performed on 3-tuple - dst MAC, source IP and dest IP. The HW makes sure that this holds for the 1st fragment too, so all fragments go to the same QP. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index ab7ebec943b8..41eaafdfb1cd 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -174,6 +174,7 @@ enum { MLX4_DEV_CAP_FLAG_VEP_UC_STEER = 1LL << 41, MLX4_DEV_CAP_FLAG_VEP_MC_STEER = 1LL << 42, MLX4_DEV_CAP_FLAG_COUNTERS = 1LL << 48, + MLX4_DEV_CAP_FLAG_RSS_IP_FRAG = 1LL << 52, MLX4_DEV_CAP_FLAG_SET_ETH_SCHED = 1LL << 53, MLX4_DEV_CAP_FLAG_SENSE_SUPPORT = 1LL << 55, MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV = 1LL << 59, -- cgit v1.2.3 From fccea6436a5bd16aed6f722efce13ec2c90427d7 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:08 +0300 Subject: net/mlx4: Make mlx4_is_eth visible inline funcion Currently implemented as static function in resource_tracker.c -- this change will allow other files in mlx4_core to use it as well. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 41eaafdfb1cd..0f7f13d6a03e 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1002,6 +1002,11 @@ static inline int mlx4_is_slave(struct mlx4_dev *dev) return dev->flags & MLX4_FLAG_SLAVE; } +static inline int mlx4_is_eth(struct mlx4_dev *dev, int port) +{ + return dev->caps.port_type[port] == MLX4_PORT_TYPE_IB ? 0 : 1; +} + int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, struct mlx4_buf *buf, gfp_t gfp); void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf); -- cgit v1.2.3 From 12a889c057504fbf307dd237aedb87263ef2848a Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:10 +0300 Subject: net/mlx4: New file for QoS related firmware commands Create two new files fw_qos.h and fw_qos.c in mlx4_core module. It gathers all relevant QoS firmware related commands etc, thus improving encapsulation of the mlx4_core module. For now it contains the QoS existing commands: mlx4_SET_PORT_SCHEDULER and mlx4_SET_PORT_PRIO2TC. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 0f7f13d6a03e..b676d0c0111a 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -49,8 +49,6 @@ #define MSIX_LEGACY_SZ 4 #define MIN_MSIX_P_PORT 5 -#define MLX4_NUM_UP 8 -#define MLX4_NUM_TC 8 #define MLX4_MAX_100M_UNITS_VAL 255 /* * work around: can't set values * greater then this value when @@ -1311,9 +1309,6 @@ int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu, u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx); int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, u8 promisc); -int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc); -int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw, - u8 *pg, u16 *ratelimit); int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable); int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); -- cgit v1.2.3 From 7e95bb99a8f969d4534867452793e44cc4731ca9 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:11 +0300 Subject: net/mlx4: Add mlx4_ALLOCATE_VPP implementation Implements device ALLOCATE_VPP command, to be used for granular QoS configuration of VFs by the PF device. Defines and queries the amount of VPPs assigned to each port, and the amount of VPPs assigned to each priority of each port. Once the total VPPs are split between the priorities of a port, they may be assigned with a share of the BW or a rate limit. Split into two functions (get/set) whoch are supplied with mlx4_alloc_vpp_context and physical port number. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/cmd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 7299e9548906..e0f88a098fb8 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -68,6 +68,7 @@ enum { MLX4_CMD_UNMAP_ICM_AUX = 0xffb, MLX4_CMD_SET_ICM_SIZE = 0xffd, MLX4_CMD_ACCESS_REG = 0x3b, + MLX4_CMD_ALLOCATE_VPP = 0x80, /*master notify fw on finish for slave's flr*/ MLX4_CMD_INFORM_FLR_DONE = 0x5b, -- cgit v1.2.3 From 1c29146d3821be984030b49cf9509a269edc3b8b Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:12 +0300 Subject: net/mlx4: Add mlx4_SET_VPORT_QOS implementation Add the SET_VPORT_QOS device command, which is ntended for virtual granular QoS configuration per VF in SRIOV mode. The SET_VPORT_QOS command sets and queries QoS parameters of a VPort. Each priority allowed for a VPort is assigned with a share of the BW, and a BW limitation. QoS parameters can be modified at any time, but must be initialized before any QP is associated with the VPort. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/cmd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index e0f88a098fb8..88326ed0033a 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -69,6 +69,7 @@ enum { MLX4_CMD_SET_ICM_SIZE = 0xffd, MLX4_CMD_ACCESS_REG = 0x3b, MLX4_CMD_ALLOCATE_VPP = 0x80, + MLX4_CMD_SET_VPORT_QOS = 0x81, /*master notify fw on finish for slave's flr*/ MLX4_CMD_INFORM_FLR_DONE = 0x5b, -- cgit v1.2.3 From d019fcb2244519d453694bfce868f3e717bfcebb Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:13 +0300 Subject: net/mlx4: Query device for QoS per VF support Checks in QUERY_DEV_CAP if the granular QoS per VF feature is supported by the device. Disabled for guests. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b676d0c0111a..c37208f7869f 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -205,7 +205,8 @@ enum { MLX4_DEV_CAP_FLAG2_PORT_REMAP = 1LL << 21, MLX4_DEV_CAP_FLAG2_QCN = 1LL << 22, MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT = 1LL << 23, - MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN = 1LL << 24 + MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN = 1LL << 24, + MLX4_DEV_CAP_FLAG2_QOS_VPP = 1LL << 25, }; enum { -- cgit v1.2.3 From 08068cd5683f11e4505aa9c8cc6ed5942f8ad299 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:15 +0300 Subject: net/mlx4: Added qos_vport QP configuration in VST mode Granular QoS per VF feature introduce a new QP field, qos_vport. PF administrator can connect VF QPs to a certain QoS Vport, to inherit its proporties. Connecting QPs to the default QoS Vport (defined as 0) is always allowed, even when there are no allocated VPPs. At this point, only the default vport is connected to QPs. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/qp.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 1023ebe035b7..6fed539e5456 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -209,7 +209,8 @@ struct mlx4_qp_context { __be16 sq_wqe_counter; u32 reserved3; __be16 rate_limit_params; - __be16 reserved4; + u8 reserved4; + u8 qos_vport; __be32 param3; __be32 nummmcpeers_basemkey; u8 log_page_size; @@ -231,6 +232,7 @@ struct mlx4_update_qp_context { enum { MLX4_UPD_QP_MASK_PM_STATE = 32, MLX4_UPD_QP_MASK_VSD = 33, + MLX4_UPD_QP_MASK_QOS_VPP = 34, MLX4_UPD_QP_MASK_RATE_LIMIT = 35, }; @@ -432,7 +434,8 @@ enum mlx4_update_qp_attr { MLX4_UPDATE_QP_SMAC = 1 << 0, MLX4_UPDATE_QP_VSD = 1 << 1, MLX4_UPDATE_QP_RATE_LIMIT = 1 << 2, - MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 3) - 1 + MLX4_UPDATE_QP_QOS_VPORT = 1 << 3, + MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 4) - 1 }; enum mlx4_update_qp_params_flags { @@ -441,6 +444,7 @@ enum mlx4_update_qp_params_flags { struct mlx4_update_qp_params { u8 smac_index; + u8 qos_vport; u32 flags; u16 rate_unit; u16 rate_val; -- cgit v1.2.3 From cda373f4849d5dd6fedceb4aeba35682a0e1a833 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:16 +0300 Subject: net/mlx4_en: Enable TX rate limit per VF Support granular QoS per VF, by implementing the ndo_set_vf_rate. Enforce a rate limit per VF when called, and enabled only for VFs in VST mode with user priority supported by the device. We don't enforce VFs to be in VST mode at the moment of configuration, but rather save the given rate limit and enforce it when the VF is moved to VST with user priority which is supported (currently 0). VST<->VGT or VST qos value state changes are disallowed when a rate limit is configured. Minimum BW share is not supported yet. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/cmd.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 88326ed0033a..a788839f54bb 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -296,6 +296,8 @@ void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbo u32 mlx4_comm_get_version(void); int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac); int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos); +int mlx4_set_vf_rate(struct mlx4_dev *dev, int port, int vf, int min_tx_rate, + int max_tx_rate); int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting); int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_info *ivf); int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_state); -- cgit v1.2.3 From 3742cc65512cd4897a63dce94104f9a6e74997a0 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:17 +0300 Subject: net/mlx4: Warn users of depracated QoS Firmware A new capability bit was introduced in the past to to differ devices using the QoS ETS feature. The old was deprecated since then. If driver sees device which set only the old capabilty, it will print warning to user suggesting to upgrade the FW. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index c37208f7869f..15f46767342e 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -207,6 +207,7 @@ enum { MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT = 1LL << 23, MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN = 1LL << 24, MLX4_DEV_CAP_FLAG2_QOS_VPP = 1LL << 25, + MLX4_DEV_CAP_FLAG2_ETS_CFG = 1LL << 26, }; enum { -- cgit v1.2.3 From a130b59057320192b4b00ed0ba4bc8a38f66f289 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:19 +0300 Subject: net/mlx4: Add SET_PORT opcode modifiers enumeration The calls to SET_PORT used hard-code numbers, when supplying command's opcode modifiers, fix that to use well defined constants. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/cmd.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index a788839f54bb..62c4d4def474 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -188,7 +188,13 @@ enum { }; enum { - /* set port opcode modifiers */ + /* Set port opcode modifiers */ + MLX4_SET_PORT_IB_OPCODE = 0x0, + MLX4_SET_PORT_ETH_OPCODE = 0x1, +}; + +enum { + /* Set port Ethernet input modifiers */ MLX4_SET_PORT_GENERAL = 0x0, MLX4_SET_PORT_RQP_CALC = 0x1, MLX4_SET_PORT_MAC_TABLE = 0x2, -- cgit v1.2.3 From 51af33cfed248dc8f36fa82df06b85e10038a01e Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:20 +0300 Subject: net/mlx4_en: Add interface identify support Add support for the interface ethtool identify feature. Make the physical port LED to blink with green and yellow colors. The device handles the LED blink by itself (synchrous use of set_phys_id), by returning 0 to ETHTOOL_ID_ACTIVE command. Signed-off-by: Eyal Grossman Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/cmd.h | 1 + include/linux/mlx4/device.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 62c4d4def474..f62e7cf227c6 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -191,6 +191,7 @@ enum { /* Set port opcode modifiers */ MLX4_SET_PORT_IB_OPCODE = 0x0, MLX4_SET_PORT_ETH_OPCODE = 0x1, + MLX4_SET_PORT_BEACON_OPCODE = 0x4, }; enum { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 15f46767342e..b761be7c88c8 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -208,6 +208,7 @@ enum { MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN = 1LL << 24, MLX4_DEV_CAP_FLAG2_QOS_VPP = 1LL << 25, MLX4_DEV_CAP_FLAG2_ETS_CFG = 1LL << 26, + MLX4_DEV_CAP_FLAG2_PORT_BEACON = 1LL << 27, }; enum { @@ -1311,6 +1312,7 @@ int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu, u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx); int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, u8 promisc); +int mlx4_SET_PORT_BEACON(struct mlx4_dev *dev, u8 port, u16 time); int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable); int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); -- cgit v1.2.3 From 78500b8c03236a18d454c9cc8a24cccca506b200 Mon Sep 17 00:00:00 2001 From: Muhammad Mahajna Date: Thu, 2 Apr 2015 16:31:22 +0300 Subject: net/mlx4_en: Add RX-ALL support Enabled when the device supports KEEP FCS and IGNORE FCS. When the flag is set, pass all received frames up the stack, even ones with invalid FCS, controlled by ethtool. Signed-off-by: Muhammad Mahajna Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b761be7c88c8..f9ce34bec45b 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -209,6 +209,7 @@ enum { MLX4_DEV_CAP_FLAG2_QOS_VPP = 1LL << 25, MLX4_DEV_CAP_FLAG2_ETS_CFG = 1LL << 26, MLX4_DEV_CAP_FLAG2_PORT_BEACON = 1LL << 27, + MLX4_DEV_CAP_FLAG2_IGNORE_FCS = 1LL << 28, }; enum { @@ -1313,6 +1314,8 @@ int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu, int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, u8 promisc); int mlx4_SET_PORT_BEACON(struct mlx4_dev *dev, u8 port, u16 time); +int mlx4_SET_PORT_fcs_check(struct mlx4_dev *dev, u8 port, + u8 ignore_fcs_value); int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable); int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); -- cgit v1.2.3 From 64599cca51de08cef94bc13a0f98351e5bb01f41 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 2 Apr 2015 17:07:25 +0300 Subject: net/mlx5_core: Use coherent memory for command interface page Use coherent memory for the commands descriptor page. Take measures to make sure the page is aligned to MLX5_ADAPTER_PAGE_SIZE as required by the hardware. Reported-by: Yevgeny Kliteynik Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 166d9315fe4b..8d8ca6d9b03b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -232,6 +232,9 @@ struct mlx5_cmd_stats { }; struct mlx5_cmd { + void *cmd_alloc_buf; + dma_addr_t alloc_dma; + int alloc_size; void *cmd_buf; dma_addr_t dma; u16 cmdif_rev; -- cgit v1.2.3 From 302bdf68fc56a6330bc6b10ce435b4d466417537 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 2 Apr 2015 17:07:29 +0300 Subject: net/mlx5_core: Fix Mellanox copyright note Signed-off-by: Achiad Shochat Signed-off-by: Saeed Mahameed Signed-off-by: Eli Cohen Signed-off-by: David S. Miller --- include/linux/mlx5/cmd.h | 2 +- include/linux/mlx5/cq.h | 2 +- include/linux/mlx5/device.h | 2 +- include/linux/mlx5/doorbell.h | 2 +- include/linux/mlx5/driver.h | 2 +- include/linux/mlx5/mlx5_ifc.h | 2 +- include/linux/mlx5/qp.h | 2 +- include/linux/mlx5/srq.h | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/cmd.h b/include/linux/mlx5/cmd.h index 2826a4b6071e..68cd08f02c2f 100644 --- a/include/linux/mlx5/cmd.h +++ b/include/linux/mlx5/cmd.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index f6b17ac601bd..72ee0d732a26 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 4e5bd813bb9a..abf65c790421 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/include/linux/mlx5/doorbell.h b/include/linux/mlx5/doorbell.h index 163a818411e7..afc78a3f4462 100644 --- a/include/linux/mlx5/doorbell.h +++ b/include/linux/mlx5/doorbell.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 8d8ca6d9b03b..8fedd39a9a60 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5f48b8f592c5..cb3ad17edd1f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 61f7a342d1bf..310b5f7fd6ae 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h index e1a363a33663..f43ed054a3e0 100644 --- a/include/linux/mlx5/srq.h +++ b/include/linux/mlx5/srq.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU -- cgit v1.2.3 From 233d05d28ad942929b6b4fbc48aa8dd083c16484 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 2 Apr 2015 17:07:32 +0300 Subject: net/mlx5_core: Move completion eqs from mlx5_ib to mlx5_core Preparation for ethernet driver. These functions will be used in drivers other than mlx5_ib. Signed-off-by: Achiad Shochat Signed-off-by: Saeed Mahameed Signed-off-by: Eli Cohen Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 8fedd39a9a60..f250f6580dad 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -410,7 +410,7 @@ struct mlx5_core_srq { struct mlx5_eq_table { void __iomem *update_ci; void __iomem *update_arm_ci; - struct list_head *comp_eq_head; + struct list_head comp_eqs_list; struct mlx5_eq pages_eq; struct mlx5_eq async_eq; struct mlx5_eq cmd_eq; @@ -725,6 +725,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); int mlx5_start_eqs(struct mlx5_core_dev *dev); int mlx5_stop_eqs(struct mlx5_core_dev *dev); +int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, int *irqn); int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); -- cgit v1.2.3 From ce0f75093282c5dca1e79ae3e3e893deaea86166 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 2 Apr 2015 17:07:33 +0300 Subject: net/mlx5_core: Modify arm CQ in preparation for upcoming Ethernet driver Pass consumer index as a parameter to arm CQ Signed-off-by: Saeed Mahameed Signed-off-by: Eli Cohen Signed-off-by: David S. Miller --- include/linux/mlx5/cq.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 72ee0d732a26..2695ced222df 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -137,14 +137,15 @@ enum { static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd, void __iomem *uar_page, - spinlock_t *doorbell_lock) + spinlock_t *doorbell_lock, + u32 cons_index) { __be32 doorbell[2]; u32 sn; u32 ci; sn = cq->arm_sn & 3; - ci = cq->cons_index & 0xffffff; + ci = cons_index & 0xffffff; *cq->arm_db = cpu_to_be32(sn << 28 | cmd | ci); -- cgit v1.2.3 From 64613d9499c4887485d4350387919ea507330d90 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 2 Apr 2015 17:07:34 +0300 Subject: net/mlx5_core: Extend struct mlx5_interface to support multiple protocols Preparation for ethernet driver. Signed-off-by: Achiad Shochat Signed-off-by: Saeed Mahameed Signed-off-by: Eli Cohen Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f250f6580dad..9a90e7523dc2 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -781,14 +781,22 @@ enum { MAX_MR_CACHE_ENTRIES = 16, }; +enum { + MLX5_INTERFACE_PROTOCOL_IB = 0, + MLX5_INTERFACE_PROTOCOL_ETH = 1, +}; + struct mlx5_interface { void * (*add)(struct mlx5_core_dev *dev); void (*remove)(struct mlx5_core_dev *dev, void *context); void (*event)(struct mlx5_core_dev *dev, void *context, enum mlx5_dev_event event, unsigned long param); + void * (*get_dev)(void *context); + int protocol; struct list_head list; }; +void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol); int mlx5_register_interface(struct mlx5_interface *intf); void mlx5_unregister_interface(struct mlx5_interface *intf); -- cgit v1.2.3 From 590ce4bcbfb4e0462a720a4ad901e84416080bba Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 22 Dec 2014 18:30:08 -0600 Subject: mnt: Add MNT_UMOUNT flag In some instances it is necessary to know if the the unmounting process has begun on a mount. Add MNT_UMOUNT to make that reliably testable. This fix gets used in fixing locked mounts in MNT_DETACH Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- include/linux/mount.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index c2c561dc0114..564beeec5d83 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -61,6 +61,7 @@ struct mnt_namespace; #define MNT_DOOMED 0x1000000 #define MNT_SYNC_UMOUNT 0x2000000 #define MNT_MARKED 0x4000000 +#define MNT_UMOUNT 0x8000000 struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ -- cgit v1.2.3 From 9a806ddbb9a18c510e4acdcc828b9a87f5fd3aef Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 1 Apr 2015 20:34:21 -0700 Subject: time: Add y2038 safe read_boot_clock64() As part of addressing in-kernel y2038 issues, this patch adds read_boot_clock64() and replaces all the call sites of read_boot_clock() with this function. This is a __weak implementation, which simply calls the existing y2038 unsafe read_boot_clock(). This allows architecture specific implementations to be converted independently, and eventually the y2038 unsafe read_boot_clock() can be removed after all its architecture specific implementations have been converted to read_boot_clock64(). Suggested-by: Arnd Bergmann Signed-off-by: Xunlei Pang Signed-off-by: John Stultz Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-2-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- include/linux/timekeeping.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 5047b83483d6..18d27a3f72ca 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -269,6 +269,7 @@ static inline bool has_persistent_clock(void) extern void read_persistent_clock(struct timespec *ts); extern void read_boot_clock(struct timespec *ts); +extern void read_boot_clock64(struct timespec64 *ts); extern int update_persistent_clock(struct timespec now); -- cgit v1.2.3 From 2ee966320028ac846654eba5344540eeb4dc228d Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 1 Apr 2015 20:34:22 -0700 Subject: time: Add y2038 safe read_persistent_clock64() As part of addressing in-kernel y2038 issues, this patch adds read_persistent_clock64() and replaces all the call sites of read_persistent_clock() with this function. This is a __weak implementation, which simply calls the existing y2038 unsafe read_persistent_clock(). This allows architecture specific implementations to be converted independently, and eventually the y2038 unsafe read_persistent_clock() can be removed after all its architecture specific implementations have been converted to read_persistent_clock64(). Suggested-by: Arnd Bergmann Signed-off-by: Xunlei Pang Signed-off-by: John Stultz Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-3-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- include/linux/timekeeping.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 18d27a3f72ca..4c0f76f4616c 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -268,6 +268,7 @@ static inline bool has_persistent_clock(void) } extern void read_persistent_clock(struct timespec *ts); +extern void read_persistent_clock64(struct timespec64 *ts); extern void read_boot_clock(struct timespec *ts); extern void read_boot_clock64(struct timespec64 *ts); extern int update_persistent_clock(struct timespec now); -- cgit v1.2.3 From 3c00a1fe8496ff29ab62764bb3f4ce4b48089004 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 1 Apr 2015 20:34:23 -0700 Subject: time: Add y2038 safe update_persistent_clock64() As part of addressing in-kernel y2038 issues, this patch adds update_persistent_clock64() and replaces all the call sites of update_persistent_clock() with this function. This is a __weak implementation, which simply calls the existing y2038 unsafe update_persistent_clock(). This allows architecture specific implementations to be converted independently, and eventually y2038-unsafe update_persistent_clock() can be removed after all its architecture specific implementations have been converted to update_persistent_clock64(). Suggested-by: Arnd Bergmann Signed-off-by: Xunlei Pang Signed-off-by: John Stultz Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-4-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- include/linux/timekeeping.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 4c0f76f4616c..7a2369d5b3f4 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -272,6 +272,7 @@ extern void read_persistent_clock64(struct timespec64 *ts); extern void read_boot_clock(struct timespec *ts); extern void read_boot_clock64(struct timespec64 *ts); extern int update_persistent_clock(struct timespec now); +extern int update_persistent_clock64(struct timespec64 now); #endif -- cgit v1.2.3 From 8e4ff1a81aa91d12856287c7103d0301ac91351a Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 1 Apr 2015 20:34:27 -0700 Subject: drivers/rtc: Provide y2038 safe rtc_class_ops.set_mmss() replacement Currently the rtc_class_op's set_mmss() function takes a 32-bit second value (on 32-bit systems), which is problematic for dates past y2038. This patch provides a safe version named set_mmss64() using y2038 safe time64_t. After this patch, set_mmss() is deprecated and all its users will be fixed to use set_mmss64(), it can be removed when having no users. Signed-off-by: Xunlei Pang [jstultz: Add whitespace fix for checkpatch] Signed-off-by: John Stultz Acked-by: Alessandro Zummo Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-8-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- include/linux/rtc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index dcad7ee0d746..8dcf6825fa88 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -77,6 +77,7 @@ struct rtc_class_ops { int (*read_alarm)(struct device *, struct rtc_wkalrm *); int (*set_alarm)(struct device *, struct rtc_wkalrm *); int (*proc)(struct device *, struct seq_file *); + int (*set_mmss64)(struct device *, time64_t secs); int (*set_mmss)(struct device *, unsigned long secs); int (*read_callback)(struct device *, int data); int (*alarm_irq_enable)(struct device *, unsigned int enabled); -- cgit v1.2.3 From 0fa88cb4b82b5cf7429bc1cef9db006ca035754e Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 1 Apr 2015 20:34:38 -0700 Subject: time, drivers/rtc: Don't bother with rtc_resume() for the nonstop clocksource If a system does not provide a persistent_clock(), the time will be updated on resume by rtc_resume(). With the addition of the non-stop clocksources for suspend timing, those systems set the time on resume in timekeeping_resume(), but may not provide a valid persistent_clock(). This results in the rtc_resume() logic thinking no one has set the time and it then will over-write the suspend time again, which is not necessary and only increases clock error. So, fix this for rtc_resume(). This patch also improves the name of persistent_clock_exist to make it more grammatical. Signed-off-by: Xunlei Pang Signed-off-by: John Stultz Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-19-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- include/linux/timekeeping.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 7a2369d5b3f4..99176af216af 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -248,6 +248,9 @@ static inline void timekeeping_clocktai(struct timespec *ts) /* * RTC specific */ +extern bool timekeeping_rtc_skipsuspend(void); +extern bool timekeeping_rtc_skipresume(void); + extern void timekeeping_inject_sleeptime64(struct timespec64 *delta); /* @@ -259,14 +262,8 @@ extern void getnstime_raw_and_real(struct timespec *ts_raw, /* * Persistent clock related interfaces */ -extern bool persistent_clock_exist; extern int persistent_clock_is_local; -static inline bool has_persistent_clock(void) -{ - return persistent_clock_exist; -} - extern void read_persistent_clock(struct timespec *ts); extern void read_persistent_clock64(struct timespec64 *ts); extern void read_boot_clock(struct timespec *ts); -- cgit v1.2.3 From 592a438ff3fea61d303c5784c209b3f1fd3e16df Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:01:10 +0200 Subject: clockevents: Provide explicit broadcast control functions clockevents_notify() is a leftover from the early design of the clockevents facility. It's really not a notification mechanism, it's a multiplex call. We are way better off to have explicit calls instead of this monstrosity. Split out the broadcast control into a separate function and provide inline helpers. Switch clockevents_notify() over. This will go away once all callers are converted. This also gets rid of the nested locking of clockevents_lock and broadcast_lock. The broadcast control functions do not require clockevents_lock. Only the managing functions (setup/shutdown/suspend/resume of the broadcast device require clockevents_lock. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Daniel Lezcano Cc: Len Brown Cc: Peter Zijlstra Cc: Tony Lindgren Link: http://lkml.kernel.org/r/8086559.ttsuS0n1Xr@vostro.rjw.lan Signed-off-by: Ingo Molnar --- include/linux/tick.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index f9ff225d53c0..4bb236357b03 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -42,6 +42,31 @@ extern void hotplug_cpu__broadcast_tick_pull(int dead_cpu); static inline void hotplug_cpu__broadcast_tick_pull(int dead_cpu) { } #endif +enum tick_broadcast_mode { + TICK_BROADCAST_OFF, + TICK_BROADCAST_ON, + TICK_BROADCAST_FORCE, +}; + +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +extern void tick_broadcast_control(enum tick_broadcast_mode mode); +#else +static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { } +#endif /* BROADCAST */ + +static inline void tick_broadcast_enable(void) +{ + tick_broadcast_control(TICK_BROADCAST_ON); +} +static inline void tick_broadcast_disable(void) +{ + tick_broadcast_control(TICK_BROADCAST_OFF); +} +static inline void tick_broadcast_force(void) +{ + tick_broadcast_control(TICK_BROADCAST_FORCE); +} + #ifdef CONFIG_NO_HZ_COMMON extern int tick_nohz_tick_stopped(void); extern void tick_nohz_idle_enter(void); -- cgit v1.2.3 From 89feddbfe7023ccfb4a6d7f5e3f5161d91b28b18 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:03:42 +0200 Subject: clockevents: Remove the broadcast control leftovers All users converted. Remove the notify leftovers. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/2076318.76XJZ8QYP3@vostro.rjw.lan Signed-off-by: Ingo Molnar --- include/linux/clockchips.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index caeca76d7b32..438fafa75067 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -11,9 +11,6 @@ /* Clock event notification values */ enum clock_event_nofitiers { CLOCK_EVT_NOTIFY_ADD, - CLOCK_EVT_NOTIFY_BROADCAST_ON, - CLOCK_EVT_NOTIFY_BROADCAST_OFF, - CLOCK_EVT_NOTIFY_BROADCAST_FORCE, CLOCK_EVT_NOTIFY_BROADCAST_ENTER, CLOCK_EVT_NOTIFY_BROADCAST_EXIT, CLOCK_EVT_NOTIFY_CPU_DYING, -- cgit v1.2.3 From 1fe5d5c3c9ba0c4ade18e3325cba0ffe35127941 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:05:15 +0200 Subject: clockevents: Provide explicit broadcast oneshot control functions clockevents_notify() is a leftover from the early design of the clockevents facility. It's really not a notification mechanism, it's a multiplex call. We are way better off to have explicit calls instead of this monstrosity. Split out the broadcast oneshot control into a separate function and provide inline helpers. Switch clockevents_notify() over. This will go away once all callers are converted. This also gets rid of the nested locking of clockevents_lock and broadcast_lock. The broadcast oneshot control functions do not require clockevents_lock. Only the managing functions (setup/shutdown/suspend/resume of the broadcast device require clockevents_lock. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Alexandre Courbot Cc: Daniel Lezcano Cc: Len Brown Cc: Peter Zijlstra Cc: Stephen Warren Cc: Thierry Reding Cc: Tony Lindgren Link: http://lkml.kernel.org/r/13000649.8qZuEDV0OA@vostro.rjw.lan Signed-off-by: Ingo Molnar --- include/linux/tick.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index 4bb236357b03..6119321fc3be 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -48,12 +48,23 @@ enum tick_broadcast_mode { TICK_BROADCAST_FORCE, }; +enum tick_broadcast_state { + TICK_BROADCAST_EXIT, + TICK_BROADCAST_ENTER, +}; + #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST extern void tick_broadcast_control(enum tick_broadcast_mode mode); #else static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { } #endif /* BROADCAST */ +#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) +extern int tick_broadcast_oneshot_control(enum tick_broadcast_state state); +#else +static inline int tick_broadcast_oneshot_control(enum tick_broadcast_state state) { return 0; } +#endif + static inline void tick_broadcast_enable(void) { tick_broadcast_control(TICK_BROADCAST_ON); @@ -66,6 +77,14 @@ static inline void tick_broadcast_force(void) { tick_broadcast_control(TICK_BROADCAST_FORCE); } +static inline int tick_broadcast_enter(void) +{ + return tick_broadcast_oneshot_control(TICK_BROADCAST_ENTER); +} +static inline void tick_broadcast_exit(void) +{ + tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT); +} #ifdef CONFIG_NO_HZ_COMMON extern int tick_nohz_tick_stopped(void); -- cgit v1.2.3 From ffa48c0d76803057ee89bf220305466d74256d7b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 3 Apr 2015 02:36:10 +0200 Subject: clockevents: Remove broadcast oneshot control leftovers Now that all users are converted over to explicit calls into the clockevents state machine, remove the notification chain leftovers. Original-from: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: John Stultz Link: http://lkml.kernel.org/r/14018863.NQUzkFuafr@vostro.rjw.lan Signed-off-by: Ingo Molnar --- include/linux/clockchips.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 438fafa75067..f97f498a5d10 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -11,8 +11,6 @@ /* Clock event notification values */ enum clock_event_nofitiers { CLOCK_EVT_NOTIFY_ADD, - CLOCK_EVT_NOTIFY_BROADCAST_ENTER, - CLOCK_EVT_NOTIFY_BROADCAST_EXIT, CLOCK_EVT_NOTIFY_CPU_DYING, CLOCK_EVT_NOTIFY_CPU_DEAD, }; -- cgit v1.2.3 From 52c063d1adbc16c76e70fffa20727fcd4e9343b3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:37:24 +0200 Subject: clockevents: Make tick handover explicit clockevents_notify() is a leftover from the early design of the clockevents facility. It's really not a notification mechanism, it's a multiplex call. We are way better off to have explicit calls instead of this monstrosity. Split out the tick_handover call and invoke it explicitely from the hotplug code. Temporary solution will be cleaned up in later patches. Signed-off-by: Thomas Gleixner [ Rebase ] Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Cc: John Stultz Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/1658173.RkEEILFiQZ@vostro.rjw.lan Signed-off-by: Ingo Molnar --- include/linux/clockchips.h | 1 - include/linux/tick.h | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index f97f498a5d10..f4bde22f8a05 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -11,7 +11,6 @@ /* Clock event notification values */ enum clock_event_nofitiers { CLOCK_EVT_NOTIFY_ADD, - CLOCK_EVT_NOTIFY_CPU_DYING, CLOCK_EVT_NOTIFY_CPU_DEAD, }; diff --git a/include/linux/tick.h b/include/linux/tick.h index 6119321fc3be..2c68fa3b9436 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -19,12 +19,14 @@ extern void tick_unfreeze(void); extern void tick_suspend_local(void); /* Should be core only, but XEN resume magic and ARM BL switcher require it */ extern void tick_resume_local(void); +extern void tick_handover_do_timer(void); #else /* CONFIG_GENERIC_CLOCKEVENTS */ static inline void tick_init(void) { } static inline void tick_freeze(void) { } static inline void tick_unfreeze(void) { } static inline void tick_suspend_local(void) { } static inline void tick_resume_local(void) { } +static inline void tick_handover_do_timer(void) { } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ #ifdef CONFIG_TICK_ONESHOT -- cgit v1.2.3 From a49b116dcb1265f238f3169507424257b0519069 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:38:05 +0200 Subject: clockevents: Cleanup dead cpu explicitely clockevents_notify() is a leftover from the early design of the clockevents facility. It's really not a notification mechanism, it's a multiplex call. We are way better off to have explicit calls instead of this monstrosity. Split out the cleanup function for a dead cpu and invoke it directly from the cpu down code. Make it conditional on CPU_HOTPLUG as well. Temporary change, will be refined in the future. Signed-off-by: Thomas Gleixner [ Rebased, added clockevents_notify() removal ] Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1735025.raBZdQHM3m@vostro.rjw.lan Signed-off-by: Ingo Molnar --- include/linux/clockchips.h | 6 ------ include/linux/tick.h | 2 ++ 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index f4bde22f8a05..96c280b2c263 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -8,12 +8,6 @@ #ifndef _LINUX_CLOCKCHIPS_H #define _LINUX_CLOCKCHIPS_H -/* Clock event notification values */ -enum clock_event_nofitiers { - CLOCK_EVT_NOTIFY_ADD, - CLOCK_EVT_NOTIFY_CPU_DEAD, -}; - #ifdef CONFIG_GENERIC_CLOCKEVENTS # include diff --git a/include/linux/tick.h b/include/linux/tick.h index 2c68fa3b9436..f8492da57ad3 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -20,6 +20,7 @@ extern void tick_suspend_local(void); /* Should be core only, but XEN resume magic and ARM BL switcher require it */ extern void tick_resume_local(void); extern void tick_handover_do_timer(void); +extern void tick_cleanup_dead_cpu(int cpu); #else /* CONFIG_GENERIC_CLOCKEVENTS */ static inline void tick_init(void) { } static inline void tick_freeze(void) { } @@ -27,6 +28,7 @@ static inline void tick_unfreeze(void) { } static inline void tick_suspend_local(void) { } static inline void tick_resume_local(void) { } static inline void tick_handover_do_timer(void) { } +static inline void tick_cleanup_dead_cpu(int cpu) { } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ #ifdef CONFIG_TICK_ONESHOT -- cgit v1.2.3 From d75e4af14e228bbe3f86e29bcecb8e6be98d4e04 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 31 Mar 2015 20:15:09 +0200 Subject: cpuidle: remove state_count field from struct cpuidle_device Thomas Schlichter reports the following issue on his Samsung NC20: "The C-states C1 and C2 to the OS when connected to AC, and additionally provides the C3 C-state when disconnected from AC. However, the number of C-states shown in sysfs is fixed to the number of C-states present at boot. If I boot with AC connected, I always only see the C-states up to C2 even if I disconnect AC. The reason is commit 130a5f692425 (ACPI / cpuidle: remove dev->state_count setting). It removes the update of dev->state_count, but sysfs uses exactly this variable to show the C-states. The fix is to use drv->state_count in sysfs. As this is currently the last user of dev->state_count, this variable can be completely removed." Remove dev->state_count as per the above. Reported-by: Thomas Schlichter Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Kyungmin Park Acked-by: Daniel Lezcano Cc: 3.14+ # 3.14+ [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- include/linux/cpuidle.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 306178d7309f..9c5e89254796 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -77,7 +77,6 @@ struct cpuidle_device { unsigned int cpu; int last_residency; - int state_count; struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX]; struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX]; struct cpuidle_driver_kobj *kobj_driver; -- cgit v1.2.3 From cae80b305e1c3944746dd93e33e9b2ccd5a490c1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 3 Apr 2015 09:04:04 -0400 Subject: locks: change lm_get_owner and lm_put_owner prototypes The current prototypes for these operations are somewhat awkward as they deal with fl_owners but take struct file_lock arguments. In the future, we'll want to be able to take references without necessarily dealing with a struct file_lock. Change them to take fl_owner_t arguments instead and have the callers deal with assigning the values to the file_lock structs. Signed-off-by: Jeff Layton --- include/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index f4131e8ead74..e4111a29697e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -893,8 +893,8 @@ struct file_lock_operations { struct lock_manager_operations { int (*lm_compare_owner)(struct file_lock *, struct file_lock *); unsigned long (*lm_owner_key)(struct file_lock *); - void (*lm_get_owner)(struct file_lock *, struct file_lock *); - void (*lm_put_owner)(struct file_lock *); + fl_owner_t (*lm_get_owner)(fl_owner_t); + void (*lm_put_owner)(fl_owner_t); void (*lm_notify)(struct file_lock *); /* unblock callback */ int (*lm_grant)(struct file_lock *, int); bool (*lm_break)(struct file_lock *); -- cgit v1.2.3 From aadc3780f31865edc84c587ab718a33a8eeeb09d Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Fri, 27 Mar 2015 09:10:10 -0700 Subject: hv: remove the per-channel workqueue It's not necessary any longer, since we can safely run the blocking message handlers in vmbus_connection.work_queue now. Signed-off-by: Dexuan Cui Cc: K. Y. Srinivasan Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 80e444bfc9dc..902c37aef67e 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -653,8 +653,6 @@ struct vmbus_channel { struct hv_device *device_obj; - struct work_struct work; - enum vmbus_channel_state state; struct vmbus_channel_offer_channel offermsg; @@ -675,7 +673,6 @@ struct vmbus_channel { struct hv_ring_buffer_info outbound; /* send to parent */ struct hv_ring_buffer_info inbound; /* receive from parent */ spinlock_t inbound_lock; - struct workqueue_struct *controlwq; struct vmbus_close_msg close_msg; -- cgit v1.2.3 From e79d8429aac95a5cbe4c235795c7cd554c91f924 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 3 Apr 2015 22:17:17 +1030 Subject: netdevice: document NETDEV_TX_BUSY deprecation. This paraphrases DaveM (and steals some of his words) explaining why a device shouldn't return NETDEV_TX_BUSY, even though it looks so inviting to driver authors. See http://www.spinics.net/lists/netdev/msg322350.html Inspired-by: David Miller Signed-off-by: Rusty Russell Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 846a1f5bc9db..a710d22b174f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -795,7 +795,10 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb, * struct net_device *dev); * Called when a packet needs to be transmitted. - * Must return NETDEV_TX_OK , NETDEV_TX_BUSY. + * Returns NETDEV_TX_OK. Can return NETDEV_TX_BUSY, but you should stop + * the queue before that can happen; it's for obsolete devices and weird + * corner cases, but the stack really does a non-trivial amount + * of useless work if you return NETDEV_TX_BUSY. * (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX) * Required can not be NULL. * -- cgit v1.2.3 From 2e7056c433216f406b90a003aa0ba42e19d3bdcf Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 31 Mar 2015 14:19:10 -0700 Subject: jhash: Update jhash_[321]words functions to use correct initval Looking over the implementation for jhash2 and comparing it to jhash_3words I realized that the two hashes were in fact very different. Doing a bit of digging led me to "The new jhash implementation" in which lookup2 was supposed to have been replaced with lookup3. In reviewing the patch I noticed that jhash2 had originally initialized a and b to JHASH_GOLDENRATIO and c to initval, but after the patch a, b, and c were initialized to initval + (length << 2) + JHASH_INITVAL. However the changes in jhash_3words simply replaced the initialization of a and b with JHASH_INITVAL. This change corrects what I believe was an oversight so that a, b, and c in jhash_3words all have the same value added consisting of initval + (length << 2) + JHASH_INITVAL so that jhash2 and jhash_3words will now produce the same hash result given the same inputs. Fixes: 60d509c823cca ("The new jhash implementation") Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/jhash.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jhash.h b/include/linux/jhash.h index 47cb09edec1a..348c6f47e4cc 100644 --- a/include/linux/jhash.h +++ b/include/linux/jhash.h @@ -145,11 +145,11 @@ static inline u32 jhash2(const u32 *k, u32 length, u32 initval) } -/* jhash_3words - hash exactly 3, 2 or 1 word(s) */ -static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval) +/* __jhash_nwords - hash exactly 3, 2 or 1 word(s) */ +static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) { - a += JHASH_INITVAL; - b += JHASH_INITVAL; + a += initval; + b += initval; c += initval; __jhash_final(a, b, c); @@ -157,14 +157,19 @@ static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval) return c; } +static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval) +{ + return __jhash_nwords(a, b, c, initval + JHASH_INITVAL + (3 << 2)); +} + static inline u32 jhash_2words(u32 a, u32 b, u32 initval) { - return jhash_3words(a, b, 0, initval); + return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); } static inline u32 jhash_1word(u32 a, u32 initval) { - return jhash_3words(a, 0, 0, initval); + return __jhash_nwords(a, 0, 0, initval + JHASH_INITVAL + (1 << 2)); } #endif /* _LINUX_JHASH_H */ -- cgit v1.2.3 From a3bebdce4135a44d09e96ba66c40797c8f9fa902 Mon Sep 17 00:00:00 2001 From: Stas Sergeev Date: Wed, 1 Apr 2015 20:30:31 +0300 Subject: add fixed_phy_update_state() - update state of fixed_phy Currently fixed_phy uses a callback to periodically poll the link state. This patch adds the fixed_phy_update_state() API. It solves the following problems: - On link state interrupt, MAC driver can't update status. Instead it needs to provide the callback to periodically query the HW about the link state. It is more efficient to update status after interrupt. - The callback needs to be unregistered before phy_disconnect(), or otherwise it will be called with net_dev==NULL. phy_disconnect() does not have enough info to unregister the callback automatically. - The callback needs to be registered before of_phy_connect() to avoid running with outdated state, but of_phy_connect() returns the phy_device pointer, which is needed to register the callback. Registering it before of_phy_connect() will therefore require a hack to get the pointer earlier. Overall, this addition makes the subsequent patch that implements SGMII link status for mvneta, much cleaner. CC: Florian Fainelli CC: netdev@vger.kernel.org CC: linux-kernel@vger.kernel.org Signed-off-by: Stas Sergeev Signed-off-by: David S. Miller --- include/linux/phy_fixed.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index 7e75bfe37cc7..fe5732d53eda 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -21,6 +21,9 @@ extern void fixed_phy_del(int phy_addr); extern int fixed_phy_set_link_update(struct phy_device *phydev, int (*link_update)(struct net_device *, struct fixed_phy_status *)); +extern int fixed_phy_update_state(struct phy_device *phydev, + const struct fixed_phy_status *status, + const struct fixed_phy_status *changed); #else static inline int fixed_phy_add(unsigned int irq, int phy_id, struct fixed_phy_status *status) @@ -43,6 +46,12 @@ static inline int fixed_phy_set_link_update(struct phy_device *phydev, { return -ENODEV; } +static inline int fixed_phy_update_state(struct phy_device *phydev, + const struct fixed_phy_status *status, + const struct fixed_phy_status *changed) +{ + return -ENODEV; +} #endif /* CONFIG_FIXED_PHY */ #endif /* __PHY_FIXED_H */ -- cgit v1.2.3 From 97badf873ab60e841243b66133ff9eff2a46ef29 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 3 Apr 2015 23:23:37 +0200 Subject: device property: Make it possible to use secondary firmware nodes Add a secondary pointer to struct fwnode_handle so as to make it possible for a device to have two firmware nodes associated with it at the same time, for example, an ACPI node and a node with a set of properties provided by platform initialization code. In the future that will allow device property lookup to fall back from the primary firmware node to the secondary one if the given property is not present there to make it easier to provide defaults for device properties used by device drivers. Introduce two helper routines, set_primary_fwnode() and set_secondary_fwnode() allowing callers to add a primary/secondary firmware node to the given device in such a way that (1) If there's only one firmware node for that device, it will be pointed to by the device's firmware node pointer. (2) If both the primary and secondary firmware nodes are present, the primary one will be pointed to by the device's firmware node pointer, while the secondary one will be pointed to by the primary node's secondary pointer. (3) If one of these nodes is removed (by calling one of the new nelpers with NULL as the second argument), the other one will be preserved. Make ACPI use set_primary_fwnode() for attaching its firmware nodes to devices. Signed-off-by: Rafael J. Wysocki Tested-by: Heikki Krogerus Acked-by: Greg Kroah-Hartman --- include/linux/acpi.h | 4 ++-- include/linux/device.h | 3 +++ include/linux/fwnode.h | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index ec488d03b518..dd12127f171c 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -54,8 +54,8 @@ static inline acpi_handle acpi_device_handle(struct acpi_device *adev) } #define ACPI_COMPANION(dev) acpi_node((dev)->fwnode) -#define ACPI_COMPANION_SET(dev, adev) (dev)->fwnode = (adev) ? \ - acpi_fwnode_handle(adev) : NULL +#define ACPI_COMPANION_SET(dev, adev) set_primary_fwnode(dev, (adev) ? \ + acpi_fwnode_handle(adev) : NULL) #define ACPI_HANDLE(dev) acpi_device_handle(ACPI_COMPANION(dev)) static inline bool has_acpi_companion(struct device *dev) diff --git a/include/linux/device.h b/include/linux/device.h index badef20b876a..324d02add7b4 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -940,6 +940,9 @@ extern void unlock_device_hotplug(void); extern int lock_device_hotplug_sysfs(void); extern int device_offline(struct device *dev); extern int device_online(struct device *dev); +extern void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode); +extern void set_secondary_fwnode(struct device *dev, struct fwnode_handle *fwnode); + /* * Root device objects for grouping under /sys/devices */ diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 17bb5f039509..fc30b84b532a 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -20,6 +20,7 @@ enum fwnode_type { struct fwnode_handle { enum fwnode_type type; + struct fwnode_handle *secondary; }; #endif -- cgit v1.2.3 From 16ba08d5c9ec44f89ec03c67ecf7a9c5e2d204fd Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 3 Apr 2015 16:05:11 +0200 Subject: device property: Introduce firmware node type for platform data Introduce data structures and code allowing "built-in" properties to be associated with devices in such a way that they will be used by the device_property_* API if no proper firmware node (neither DT nor ACPI) is present for the given device. Each property is to be represented by a property_entry structure. An array of property_entry structures (terminated with a null entry) can be pointed to by the properties field of struct property_set that can be added as a firmware node to a struct device using device_add_property_set(). That will cause the device_property_* API to use that property_set as the source of properties if the given device does not have a DT node or an ACPI companion device object associated with it. Signed-off-by: Rafael J. Wysocki Tested-by: Heikki Krogerus Acked-by: Greg Kroah-Hartman --- include/linux/fwnode.h | 1 + include/linux/property.h | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index fc30b84b532a..0408545bce42 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -16,6 +16,7 @@ enum fwnode_type { FWNODE_INVALID = 0, FWNODE_OF, FWNODE_ACPI, + FWNODE_PDATA, }; struct fwnode_handle { diff --git a/include/linux/property.h b/include/linux/property.h index 31dfd3db35d6..de8bdf417a35 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -131,4 +131,37 @@ static inline int fwnode_property_read_u64(struct fwnode_handle *fwnode, return fwnode_property_read_u64_array(fwnode, propname, val, 1); } +/** + * struct property_entry - "Built-in" device property representation. + * @name: Name of the property. + * @type: Type of the property. + * @nval: Number of items of type @type making up the value. + * @value: Value of the property (an array of @nval items of type @type). + */ +struct property_entry { + const char *name; + enum dev_prop_type type; + size_t nval; + union { + void *raw_data; + u8 *u8_data; + u16 *u16_data; + u32 *u32_data; + u64 *u64_data; + const char **str; + } value; +}; + +/** + * struct property_set - Collection of "built-in" device properties. + * @fwnode: Handle to be pointed to by the fwnode field of struct device. + * @properties: Array of properties terminated with a null entry. + */ +struct property_set { + struct fwnode_handle fwnode; + struct property_entry *properties; +}; + +void device_add_property_set(struct device *dev, struct property_set *pset); + #endif /* _LINUX_PROPERTY_H_ */ -- cgit v1.2.3 From cfdfab314647b1755afedc33ab66f3f247e161ae Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 3 Apr 2015 16:23:58 -0400 Subject: netfilter: Create and use nf_hook_state. Instead of passing a large number of arguments down into the nf_hook() entry points, create a structure which carries this state down through the hook processing layers. This makes is so that if we want to change the types or signatures of any of these pieces of state, there are less places that need to be changed. Signed-off-by: David S. Miller --- include/linux/netfilter.h | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 2517ece98820..aee7ef1e23ed 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -44,6 +44,16 @@ int netfilter_init(void); struct sk_buff; struct nf_hook_ops; + +struct nf_hook_state { + unsigned int hook; + int thresh; + u_int8_t pf; + struct net_device *in; + struct net_device *out; + int (*okfn)(struct sk_buff *); +}; + typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, @@ -118,9 +128,7 @@ static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) } #endif -int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb, - struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *), int thresh); +int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state); /** * nf_hook_thresh - call a netfilter hook @@ -135,8 +143,18 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, struct net_device *outdev, int (*okfn)(struct sk_buff *), int thresh) { - if (nf_hooks_active(pf, hook)) - return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh); + if (nf_hooks_active(pf, hook)) { + struct nf_hook_state state = { + .hook = hook, + .thresh = thresh, + .pf = pf, + .in = indev, + .out = outdev, + .okfn = okfn + }; + + return nf_hook_slow(skb, &state); + } return 1; } -- cgit v1.2.3 From 238e54c9cb9385a1ba99e92801f3615a2fb398b6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 3 Apr 2015 20:32:56 -0400 Subject: netfilter: Make nf_hookfn use nf_hook_state. Pass the nf_hook_state all the way down into the hook functions themselves. Signed-off-by: David S. Miller --- include/linux/netfilter.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index aee7ef1e23ed..c480c43ad8f7 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -56,9 +56,7 @@ struct nf_hook_state { typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)); + const struct nf_hook_state *state); struct nf_hook_ops { struct list_head list; -- cgit v1.2.3 From 1c491ba2592f621f21a693d43fab06302527fc0f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 3 Apr 2015 20:56:08 -0400 Subject: netfilter: Pass nf_hook_state through ipt_do_table(). Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_tables.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 901e84db847d..4073510da485 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -65,8 +65,7 @@ struct ipt_error { extern void *ipt_alloc_initial_table(const struct xt_table *); extern unsigned int ipt_do_table(struct sk_buff *skb, unsigned int hook, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct xt_table *table); #ifdef CONFIG_COMPAT -- cgit v1.2.3 From 8f8a37152df49d541c43f010543f2b0176fcfb8e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 3 Apr 2015 21:09:51 -0400 Subject: netfilter: Pass nf_hook_state through ip6t_do_table(). Signed-off-by: David S. Miller --- include/linux/netfilter_ipv6/ip6_tables.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 610208b18c05..b40d2b635778 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -31,8 +31,7 @@ extern struct xt_table *ip6t_register_table(struct net *net, extern void ip6t_unregister_table(struct net *net, struct xt_table *table); extern unsigned int ip6t_do_table(struct sk_buff *skb, unsigned int hook, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct xt_table *table); /* Check for an extension */ -- cgit v1.2.3 From b85c3dc9bd5347ad9540ec8d103b7c049c48b7cd Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 3 Apr 2015 21:18:46 -0400 Subject: netfilter: Pass nf_hook_state through arpt_do_table(). Signed-off-by: David S. Miller --- include/linux/netfilter_arp/arp_tables.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index cfb7191e6efa..c22a7fb8d0df 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -54,8 +54,7 @@ extern struct xt_table *arpt_register_table(struct net *net, extern void arpt_unregister_table(struct xt_table *table); extern unsigned int arpt_do_table(struct sk_buff *skb, unsigned int hook, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct xt_table *table); #ifdef CONFIG_COMPAT -- cgit v1.2.3 From 709c2c70c8ea73f488971f1de3c39fbdac995951 Mon Sep 17 00:00:00 2001 From: Beomho Seo Date: Fri, 3 Apr 2015 17:26:08 +0900 Subject: power: max17042_battery: Use reg type instead of chip type Currently, max17042 battery driver choose register map by MAX17042_DevName register. But it is return IC specific firmware version. So other maxim chip hard to use this drvier. This patch choose chip type from driver_data. Signed-off-by: Beomho Seo Reviewed-by: Krzysztof Kozlowski Signed-off-by: Sebastian Reichel --- include/linux/power/max17042_battery.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/power/max17042_battery.h b/include/linux/power/max17042_battery.h index 89dd84f47c6e..cf112b4075c8 100644 --- a/include/linux/power/max17042_battery.h +++ b/include/linux/power/max17042_battery.h @@ -126,7 +126,14 @@ enum max17047_register { MAX17047_QRTbl30 = 0x42, }; -enum max170xx_chip_type {MAX17042, MAX17047}; +enum max170xx_chip_type { + MAXIM_DEVICE_TYPE_UNKNOWN = 0, + MAXIM_DEVICE_TYPE_MAX17042, + MAXIM_DEVICE_TYPE_MAX17047, + MAXIM_DEVICE_TYPE_MAX17050, + + MAXIM_DEVICE_TYPE_NUM +}; /* * used for setting a register to a desired value -- cgit v1.2.3 From b2edffdd912b4205899a8efa0974dfbbc3216109 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 6 Apr 2015 17:48:54 -0400 Subject: fix mremap() vs. ioctx_kill() race teach ->mremap() method to return an error and have it fail for aio mappings in process of being killed Note that in case of ->mremap() failure we need to undo move_page_tables() we'd already done; we could call ->mremap() first, but then the failure of move_page_tables() would require undoing whatever _successful_ ->mremap() has done, which would be a lot more headache in general. Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index f4131e8ead74..52cc4492cb3a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1549,7 +1549,7 @@ struct file_operations { long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); - void (*mremap)(struct file *, struct vm_area_struct *); + int (*mremap)(struct file *, struct vm_area_struct *); int (*open) (struct inode *, struct file *); int (*flush) (struct file *, fl_owner_t id); int (*release) (struct inode *, struct file *); -- cgit v1.2.3 From 0ea18b40cdd382a8a338d02e06c4646c35bd753b Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 7 Apr 2015 11:14:38 -0600 Subject: vgaarb: Stub vga_set_legacy_decoding() vga_set_legacy_decoding() is defined in drivers/gpu/vga/vgaarb.c, which is only compiled with CONFIG_VGA_ARB. A caller would therefore get an undefined symbol if the VGA arbiter is not enabled. Signed-off-by: Alex Williamson Acked-by: Dave Airlie --- include/linux/vgaarb.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h index c37bd4d06739..8c3b412d84df 100644 --- a/include/linux/vgaarb.h +++ b/include/linux/vgaarb.h @@ -65,8 +65,13 @@ struct pci_dev; * out of the arbitration process (and can be safe to take * interrupts at any time. */ +#if defined(CONFIG_VGA_ARB) extern void vga_set_legacy_decoding(struct pci_dev *pdev, unsigned int decodes); +#else +static inline void vga_set_legacy_decoding(struct pci_dev *pdev, + unsigned int decodes) { }; +#endif /** * vga_get - acquire & locks VGA resources -- cgit v1.2.3 From 62f0342de1f012f3e90607d39e20fce811391169 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Fri, 13 Feb 2015 14:34:25 -0600 Subject: usb: define a generic USB_RESUME_TIMEOUT macro Every USB Host controller should use this new macro to define for how long resume signalling should be driven on the bus. Currently, almost every single USB controller is using a 20ms timeout for resume signalling. That's problematic for two reasons: a) sometimes that 20ms timer expires a little before 20ms, which makes us fail certification b) some (many) devices actually need more than 20ms resume signalling. Sure, in case of (b) we can state that the device is against the USB spec, but the fact is that we have no control over which device the certification lab will use. We also have no control over which host they will use. Most likely they'll be using a Windows PC which, again, we have no control over how that USB stack is written and how long resume signalling they are using. At the end of the day, we must make sure Linux passes electrical compliance when working as Host or as Device and currently we don't pass compliance as host because we're driving resume signallig for exactly 20ms and that confuses certification test setup resulting in Certification failure. Cc: # v3.10+ Acked-by: Greg Kroah-Hartman Acked-by: Peter Chen Signed-off-by: Felipe Balbi --- include/linux/usb.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 7ee1b5c3b4cb..447fe29b55b4 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -205,6 +205,32 @@ void usb_put_intf(struct usb_interface *intf); #define USB_MAXINTERFACES 32 #define USB_MAXIADS (USB_MAXINTERFACES/2) +/* + * USB Resume Timer: Every Host controller driver should drive the resume + * signalling on the bus for the amount of time defined by this macro. + * + * That way we will have a 'stable' behavior among all HCDs supported by Linux. + * + * Note that the USB Specification states we should drive resume for *at least* + * 20 ms, but it doesn't give an upper bound. This creates two possible + * situations which we want to avoid: + * + * (a) sometimes an msleep(20) might expire slightly before 20 ms, which causes + * us to fail USB Electrical Tests, thus failing Certification + * + * (b) Some (many) devices actually need more than 20 ms of resume signalling, + * and while we can argue that's against the USB Specification, we don't have + * control over which devices a certification laboratory will be using for + * certification. If CertLab uses a device which was tested against Windows and + * that happens to have relaxed resume signalling rules, we might fall into + * situations where we fail interoperability and electrical tests. + * + * In order to avoid both conditions, we're using a 40 ms resume timeout, which + * should cope with both LPJ calibration errors and devices not following every + * detail of the USB Specification. + */ +#define USB_RESUME_TIMEOUT 40 /* ms */ + /** * struct usb_interface_cache - long-term representation of a device interface * @num_altsetting: number of altsettings defined. -- cgit v1.2.3 From ff61eb422024a4614f898c1c73625e222b219a5d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 7 Apr 2015 20:39:19 +0200 Subject: spi: Make master->handle_err() callback optional to avoid crashes If a driver doesn't implement the master->handle_err() callback and an SPI transfer fails, the kernel will crash with a NULL pointer dereference: Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = c0003000 [00000000] *pgd=80000040004003, *pmd=00000000 Internal error: Oops: 80000206 [#1] SMP ARM Modules linked in: CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.0.0-rc7-koelsch-05861-g1fc9fdd4add4f783 #1046 Hardware name: Generic R8A7791 (Flattened Device Tree) task: eec359c0 ti: eec54000 task.ti: eec54000 PC is at 0x0 LR is at spi_transfer_one_message+0x1cc/0x1f0 Make the master->handle_err() callback optional to avoid the crash. Also fix a spelling mistake in the callback documentation while we're at it. Fixes: b716c4ffc6a2b0bf ("spi: introduce master->handle_err() callback") Signed-off-by: Geert Uytterhoeven Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 4eaac3a5227b..5685af84a64d 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -294,7 +294,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * transfer_one_message are mutually exclusive; when both * are set, the generic subsystem does not call your * transfer_one callback. - * @handle_err: the subsystem calls the driver to handle and error that occurs + * @handle_err: the subsystem calls the driver to handle an error that occurs * in the generic implementation of transfer_one_message(). * @unprepare_message: undo any work done by prepare_message(). * @cs_gpios: Array of GPIOs to use as chip select lines; one per CS -- cgit v1.2.3 From 107a9f4dc9211c1f91703d1739d7fd22ac58b332 Mon Sep 17 00:00:00 2001 From: David Miller Date: Sun, 5 Apr 2015 22:18:54 -0400 Subject: netfilter: Add nf_hook_state initializer function. This way we can consolidate where we setup new nf_hook_state objects, to make sure the entire thing is initialized. The only other place an nf_hook_object is instantiated is nf_queue, wherein a structure copy is used. Signed-off-by: David S. Miller --- include/linux/netfilter.h | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index c480c43ad8f7..b8c88f3c85ff 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -54,6 +54,21 @@ struct nf_hook_state { int (*okfn)(struct sk_buff *); }; +static inline void nf_hook_state_init(struct nf_hook_state *p, + unsigned int hook, + int thresh, u_int8_t pf, + struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct sk_buff *)) +{ + p->hook = hook; + p->thresh = thresh; + p->pf = pf; + p->in = indev; + p->out = outdev; + p->okfn = okfn; +} + typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state); @@ -142,15 +157,10 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, int (*okfn)(struct sk_buff *), int thresh) { if (nf_hooks_active(pf, hook)) { - struct nf_hook_state state = { - .hook = hook, - .thresh = thresh, - .pf = pf, - .in = indev, - .out = outdev, - .okfn = okfn - }; + struct nf_hook_state state; + nf_hook_state_init(&state, hook, thresh, pf, + indev, outdev, okfn); return nf_hook_slow(skb, &state); } return 1; -- cgit v1.2.3 From 1c984f8a5df085bcf35364a8a870bd4db4da4ed3 Mon Sep 17 00:00:00 2001 From: David Miller Date: Sun, 5 Apr 2015 22:19:00 -0400 Subject: netfilter: Add socket pointer to nf_hook_state. It is currently always set to NULL, but nf_queue is adjusted to be prepared for it being set to a real socket by taking and releasing a reference to that socket when necessary. Signed-off-by: David S. Miller --- include/linux/netfilter.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index b8c88f3c85ff..f8f58fab2402 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -45,12 +45,15 @@ struct sk_buff; struct nf_hook_ops; +struct sock; + struct nf_hook_state { unsigned int hook; int thresh; u_int8_t pf; struct net_device *in; struct net_device *out; + struct sock *sk; int (*okfn)(struct sk_buff *); }; @@ -59,6 +62,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, int thresh, u_int8_t pf, struct net_device *indev, struct net_device *outdev, + struct sock *sk, int (*okfn)(struct sk_buff *)) { p->hook = hook; @@ -66,6 +70,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, p->pf = pf; p->in = indev; p->out = outdev; + p->sk = sk; p->okfn = okfn; } @@ -160,7 +165,7 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, struct nf_hook_state state; nf_hook_state_init(&state, hook, thresh, pf, - indev, outdev, okfn); + indev, outdev, NULL, okfn); return nf_hook_slow(skb, &state); } return 1; -- cgit v1.2.3 From 7026b1ddb6b8d4e6ee33dc2bd06c0ca8746fa7ab Mon Sep 17 00:00:00 2001 From: David Miller Date: Sun, 5 Apr 2015 22:19:04 -0400 Subject: netfilter: Pass socket pointer down through okfn(). On the output paths in particular, we have to sometimes deal with two socket contexts. First, and usually skb->sk, is the local socket that generated the frame. And second, is potentially the socket used to control a tunneling socket, such as one the encapsulates using UDP. We do not want to disassociate skb->sk when encapsulating in order to fix this, because that would break socket memory accounting. The most extreme case where this can cause huge problems is an AF_PACKET socket transmitting over a vxlan device. We hit code paths doing checks that assume they are dealing with an ipv4 socket, but are actually operating upon the AF_PACKET one. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 14 +++++++-- include/linux/netfilter.h | 62 ++++++++++++++++++++++------------------ include/linux/netfilter_bridge.h | 2 +- 3 files changed, 46 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 41bf58a2b936..45823db2efb0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2165,8 +2165,12 @@ int dev_open(struct net_device *dev); int dev_close(struct net_device *dev); int dev_close_many(struct list_head *head, bool unlink); void dev_disable_lro(struct net_device *dev); -int dev_loopback_xmit(struct sk_buff *newskb); -int dev_queue_xmit(struct sk_buff *skb); +int dev_loopback_xmit(struct sock *sk, struct sk_buff *newskb); +int dev_queue_xmit_sk(struct sock *sk, struct sk_buff *skb); +static inline int dev_queue_xmit(struct sk_buff *skb) +{ + return dev_queue_xmit_sk(skb->sk, skb); +} int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv); int register_netdevice(struct net_device *dev); void unregister_netdevice_queue(struct net_device *dev, struct list_head *head); @@ -2927,7 +2931,11 @@ static inline void dev_consume_skb_any(struct sk_buff *skb) int netif_rx(struct sk_buff *skb); int netif_rx_ni(struct sk_buff *skb); -int netif_receive_skb(struct sk_buff *skb); +int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb); +static inline int netif_receive_skb(struct sk_buff *skb) +{ + return netif_receive_skb_sk(skb->sk, skb); +} gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); void napi_gro_flush(struct napi_struct *napi, bool flush_old); struct sk_buff *napi_get_frags(struct napi_struct *napi); diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index f8f58fab2402..63560d0a8dfe 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -54,7 +54,7 @@ struct nf_hook_state { struct net_device *in; struct net_device *out; struct sock *sk; - int (*okfn)(struct sk_buff *); + int (*okfn)(struct sock *, struct sk_buff *); }; static inline void nf_hook_state_init(struct nf_hook_state *p, @@ -63,7 +63,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, struct net_device *indev, struct net_device *outdev, struct sock *sk, - int (*okfn)(struct sk_buff *)) + int (*okfn)(struct sock *, struct sk_buff *)) { p->hook = hook; p->thresh = thresh; @@ -156,26 +156,29 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state); * value indicates the packet has been consumed by the hook. */ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, + struct sock *sk, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *), int thresh) + int (*okfn)(struct sock *, struct sk_buff *), + int thresh) { if (nf_hooks_active(pf, hook)) { struct nf_hook_state state; nf_hook_state_init(&state, hook, thresh, pf, - indev, outdev, NULL, okfn); + indev, outdev, sk, okfn); return nf_hook_slow(skb, &state); } return 1; } -static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb, - struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *)) +static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sock *sk, + struct sk_buff *skb, struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct sock *, struct sk_buff *)) { - return nf_hook_thresh(pf, hook, skb, indev, outdev, okfn, INT_MIN); + return nf_hook_thresh(pf, hook, sk, skb, indev, outdev, okfn, INT_MIN); } /* Activate hook; either okfn or kfree_skb called, unless a hook @@ -196,35 +199,36 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb, */ static inline int -NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct sk_buff *skb, - struct net_device *in, struct net_device *out, - int (*okfn)(struct sk_buff *), int thresh) +NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct sock *sk, + struct sk_buff *skb, struct net_device *in, + struct net_device *out, + int (*okfn)(struct sock *, struct sk_buff *), int thresh) { - int ret = nf_hook_thresh(pf, hook, skb, in, out, okfn, thresh); + int ret = nf_hook_thresh(pf, hook, sk, skb, in, out, okfn, thresh); if (ret == 1) - ret = okfn(skb); + ret = okfn(sk, skb); return ret; } static inline int -NF_HOOK_COND(uint8_t pf, unsigned int hook, struct sk_buff *skb, - struct net_device *in, struct net_device *out, - int (*okfn)(struct sk_buff *), bool cond) +NF_HOOK_COND(uint8_t pf, unsigned int hook, struct sock *sk, + struct sk_buff *skb, struct net_device *in, struct net_device *out, + int (*okfn)(struct sock *, struct sk_buff *), bool cond) { int ret; if (!cond || - ((ret = nf_hook_thresh(pf, hook, skb, in, out, okfn, INT_MIN)) == 1)) - ret = okfn(skb); + ((ret = nf_hook_thresh(pf, hook, sk, skb, in, out, okfn, INT_MIN)) == 1)) + ret = okfn(sk, skb); return ret; } static inline int -NF_HOOK(uint8_t pf, unsigned int hook, struct sk_buff *skb, +NF_HOOK(uint8_t pf, unsigned int hook, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, - int (*okfn)(struct sk_buff *)) + int (*okfn)(struct sock *, struct sk_buff *)) { - return NF_HOOK_THRESH(pf, hook, skb, in, out, okfn, INT_MIN); + return NF_HOOK_THRESH(pf, hook, sk, skb, in, out, okfn, INT_MIN); } /* Call setsockopt() */ @@ -324,19 +328,21 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) } #else /* !CONFIG_NETFILTER */ -#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) -#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb) +#define NF_HOOK(pf, hook, sk, skb, indev, outdev, okfn) (okfn)(sk, skb) +#define NF_HOOK_COND(pf, hook, sk, skb, indev, outdev, okfn, cond) (okfn)(sk, skb) static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, + struct sock *sk, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *), int thresh) + int (*okfn)(struct sock *sk, struct sk_buff *), int thresh) { - return okfn(skb); + return okfn(sk, skb); } -static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb, - struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *)) +static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sock *sk, + struct sk_buff *skb, struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct sock *, struct sk_buff *)) { return 1; } diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index 2734977199ca..5fc0a0fe244b 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -30,7 +30,7 @@ static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) return 0; } -int br_handle_frame_finish(struct sk_buff *skb); +int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb); static inline void br_drop_fake_rtable(struct sk_buff *skb) { -- cgit v1.2.3 From 388069d30240e9524ad69ce6cc692fff2a5a7829 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 7 Apr 2015 12:10:16 +0200 Subject: netdevice.h: remove iflink description Also move 'group' description to match the order of the net_device structure. Fixes: 7a66bbc96ce9 ("net: remove iflink field from struct net_device") Reported-by: Fengguang Wu Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 45823db2efb0..bf6d9df34d7b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1328,7 +1328,7 @@ enum netdev_priv_flags { * @mpls_features: Mask of features inheritable by MPLS * * @ifindex: interface index - * @iflink: unique device identifier + * @group: The group, that the device belongs to * * @stats: Statistics struct, which was left as a legacy, use * rtnl_link_stats64 instead @@ -1488,7 +1488,6 @@ enum netdev_priv_flags { * * @qdisc_tx_busylock: XXX: need comments on this one * - * @group: The group, that the device belongs to * @pm_qos_req: Power Management QoS object * * FIXME: cleanup struct net_device such that network protocol info -- cgit v1.2.3 From 7f9b838b71eb78a27de27a12ca5de8542fac3115 Mon Sep 17 00:00:00 2001 From: Daniel Lee Date: Mon, 6 Apr 2015 14:37:26 -0700 Subject: tcp: RFC7413 option support for Fast Open server Fast Open has been using the experimental option with a magic number (RFC6994) to request and grant Fast Open cookies. This patch enables the server to support the official IANA option 34 in RFC7413 in addition. The change has passed all existing Fast Open tests with both old and new options at Google. Signed-off-by: Daniel Lee Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index f869ae8afbaf..a48d00318683 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -58,6 +58,7 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb) struct tcp_fastopen_cookie { s8 len; u8 val[TCP_FASTOPEN_COOKIE_MAX]; + bool exp; /* In RFC6994 experimental option format */ }; /* This defines a selective acknowledgement block. */ -- cgit v1.2.3 From 2646c831c00c5d22aa72b79d24069c1b412cda7c Mon Sep 17 00:00:00 2001 From: Daniel Lee Date: Mon, 6 Apr 2015 14:37:27 -0700 Subject: tcp: RFC7413 option support for Fast Open client Fast Open has been using an experimental option with a magic number (RFC6994). This patch makes the client by default use the RFC7413 option (34) to get and send Fast Open cookies. This patch makes the client solicit cookies from a given server first with the RFC7413 option. If that fails to elicit a cookie, then it tries the RFC6994 experimental option. If that also fails, it uses the RFC7413 option on all subsequent connect attempts. If the server returns a Fast Open cookie then the client caches the form of the option that successfully elicited a cookie, and uses that form on later connects when it presents that cookie. The idea is to gradually obsolete the use of experimental options as the servers and clients upgrade, while keeping the interoperability meanwhile. Signed-off-by: Daniel Lee Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index a48d00318683..0caa3a2d4106 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -189,6 +189,7 @@ struct tcp_sock { u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ syn_data:1, /* SYN includes data */ syn_fastopen:1, /* SYN includes Fast Open option */ + syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ -- cgit v1.2.3 From a368ab67aa55615a03b2c9c00fb965bee3ebeaa4 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 7 Apr 2015 14:26:41 -0700 Subject: mm: move zone lock to a different cache line than order-0 free page lists Huang Ying reported the following problem due to commit 3484b2de9499 ("mm: rearrange zone fields into read-only, page alloc, statistics and page reclaim lines") from the Intel performance tests 24b7e5819ad5cbef 3484b2de9499df23c4604a513b ---------------- -------------------------- %stddev %change %stddev \ | \ 152288 \261 0% -46.2% 81911 \261 0% aim7.jobs-per-min 237 \261 0% +85.6% 440 \261 0% aim7.time.elapsed_time 237 \261 0% +85.6% 440 \261 0% aim7.time.elapsed_time.max 25026 \261 0% +70.7% 42712 \261 0% aim7.time.system_time 2186645 \261 5% +32.0% 2885949 \261 4% aim7.time.voluntary_context_switches 4576561 \261 1% +24.9% 5715773 \261 0% aim7.time.involuntary_context_switches The problem is specific to very large machines under stress. It was not reproducible with the machines I had used to justify the original patch because large numbers of CPUs are required. When pressure is high enough, the cache line is bouncing between CPUs trying to acquire the lock and the holder of the lock adjusting free lists. The intention was that the acquirer of the lock would automatically have the cache line holding the free lists but according to Huang, this is not a universal win. One possibility is to move the zone lock to its own cache line but it increases the size of the zone. This patch moves the lock to the other end of the free lists where they do not contend under high pressure. It does mean the page allocator paths now require more cache lines but Huang reports that it restores performance to previous levels on large machines %stddev %change %stddev \ | \ 84568 \261 1% +94.3% 164280 \261 1% aim7.jobs-per-min 2881944 \261 2% -35.1% 1870386 \261 8% aim7.time.voluntary_context_switches 681 \261 1% -3.4% 658 \261 0% aim7.time.user_time 5538139 \261 0% -12.1% 4867884 \261 0% aim7.time.involuntary_context_switches 44174 \261 1% -46.0% 23848 \261 1% aim7.time.system_time 426 \261 1% -48.4% 219 \261 1% aim7.time.elapsed_time 426 \261 1% -48.4% 219 \261 1% aim7.time.elapsed_time.max 468 \261 1% -43.1% 266 \261 2% uptime.boot Signed-off-by: Mel Gorman Reported-by: Huang Ying Tested-by: Huang Ying Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index f279d9c158cd..2782df47101e 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -474,16 +474,15 @@ struct zone { unsigned long wait_table_bits; ZONE_PADDING(_pad1_) - - /* Write-intensive fields used from the page allocator */ - spinlock_t lock; - /* free areas of different sizes */ struct free_area free_area[MAX_ORDER]; /* zone flags, see below */ unsigned long flags; + /* Write-intensive fields used from the page allocator */ + spinlock_t lock; + ZONE_PADDING(_pad2_) /* Write-intensive fields used by page reclaim */ -- cgit v1.2.3 From ce66b032ad7b838bf376e3b1bb4d8bce1a69ee5c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 7 Apr 2015 14:26:44 -0700 Subject: include/linux/dmapool.h: declare struct device dmapool uses struct device in function arguments but relies on an implicit inclusion to declare struct device causing warnings in some configurations: include/linux/dmapool.h:31:7: warning: 'struct device' declared inside parameter list Fix this by adding a struct device declaration to the file. Signed-off-by: Mark Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dmapool.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmapool.h b/include/linux/dmapool.h index 022e34fcbd1b..52456aa566a0 100644 --- a/include/linux/dmapool.h +++ b/include/linux/dmapool.h @@ -14,6 +14,8 @@ #include #include +struct device; + struct dma_pool *dma_pool_create(const char *name, struct device *dev, size_t size, size_t align, size_t allocation); -- cgit v1.2.3 From a67a95134ffddd0ca4527c77e86375c3deb2938f Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 7 Apr 2015 16:57:19 -0600 Subject: NVMe: Meta data handling through submit io ioctl This adds support for the extended metadata formats through the submit IO ioctl, and simplifies the rest when using a separate metadata format. Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/nvme.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 0adad4a5419b..8dbd05e70f09 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -117,8 +117,9 @@ struct nvme_ns { unsigned ns_id; int lba_shift; - int ms; - int pi_type; + u16 ms; + bool ext; + u8 pi_type; u64 mode_select_num_blocks; u32 mode_select_block_len; }; -- cgit v1.2.3 From 58d269d8cccc53643f1a0900cfc0940e85ec9691 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 2 Apr 2015 03:10:36 +0300 Subject: KVM: x86: BSP in MSR_IA32_APICBASE is writable After reset, the CPU can change the BSP, which will be used upon INIT. Reset should return the BSP which QEMU asked for, and therefore handled accordingly. To quote: "If the MP protocol has completed and a BSP is chosen, subsequent INITs (either to a specific processor or system wide) do not cause the MP protocol to be repeated." [Intel SDM 8.4.2: MP Initialization Protocol Requirements and Restrictions] Signed-off-by: Nadav Amit Message-Id: <1427933438-12782-3-git-send-email-namit@cs.technion.ac.il> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 27bd53b69080..82af5d0b996e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -982,11 +982,16 @@ static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) #endif /* CONFIG_HAVE_KVM_EVENTFD */ #ifdef CONFIG_KVM_APIC_ARCHITECTURE -static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) +static inline bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu) { return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id; } +static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0; +} + bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu); #else -- cgit v1.2.3 From f5c5179b9a8ab8e3255f78e233d94ea54f23f832 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 31 Mar 2015 12:41:55 +0200 Subject: mmc: core: Convert the error field in struct mmc_command|data into an int Everybody expects the error field in the struct mmc_command|data to be and int but it's actually an unsigned int. Let's convert it into an int to meet the expectations. Signed-off-by: Ulf Hansson --- include/linux/mmc/core.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index 160448f920ac..de722d4e9d61 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -79,7 +79,7 @@ struct mmc_command { #define mmc_cmd_type(cmd) ((cmd)->flags & MMC_CMD_MASK) unsigned int retries; /* max number of retries */ - unsigned int error; /* command error */ + int error; /* command error */ /* * Standard errno values are used for errors, but some have specific @@ -108,7 +108,7 @@ struct mmc_data { unsigned int timeout_clks; /* data timeout (in clocks) */ unsigned int blksz; /* data block size */ unsigned int blocks; /* number of blocks */ - unsigned int error; /* data error */ + int error; /* data error */ unsigned int flags; #define MMC_DATA_WRITE (1 << 8) -- cgit v1.2.3 From acd388fd3af350ab24c6ab6f19b83fc4a4f3aa60 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 31 Mar 2015 14:37:12 -0400 Subject: tracing: Give system name a pointer Normally the compiler will use the same pointer for a string throughout the file. But there's no guarantee of that happening. Later changes will require that all events have the same pointer to the system string. Name the system string and have all events point to it. Testing this, it did not increases the size of the text, except for the notes section, which should not harm the real size any. Link: http://lkml.kernel.org/r/20150403013802.220157513@goodmis.org Reviewed-by: Masami Hiramatsu Tested-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index c674ee8f7fca..62b8fac7ded5 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -202,7 +202,7 @@ enum trace_reg { struct ftrace_event_call; struct ftrace_event_class { - char *system; + const char *system; void *probe; #ifdef CONFIG_PERF_EVENTS void *perf_probe; -- cgit v1.2.3 From 0c564a538aa934ad15b2145aaf8b64f3feb0be63 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 24 Mar 2015 17:58:09 -0400 Subject: tracing: Add TRACE_DEFINE_ENUM() macro to map enums to their values Several tracepoints use the helper functions __print_symbolic() or __print_flags() and pass in enums that do the mapping between the binary data stored and the value to print. This works well for reading the ASCII trace files, but when the data is read via userspace tools such as perf and trace-cmd, the conversion of the binary value to a human string format is lost if an enum is used, as userspace does not have access to what the ENUM is. For example, the tracepoint trace_tlb_flush() has: __print_symbolic(REC->reason, { TLB_FLUSH_ON_TASK_SWITCH, "flush on task switch" }, { TLB_REMOTE_SHOOTDOWN, "remote shootdown" }, { TLB_LOCAL_SHOOTDOWN, "local shootdown" }, { TLB_LOCAL_MM_SHOOTDOWN, "local mm shootdown" }) Which maps the enum values to the strings they represent. But perf and trace-cmd do no know what value TLB_LOCAL_MM_SHOOTDOWN is, and would not be able to map it. With TRACE_DEFINE_ENUM(), developers can place these in the event header files and ftrace will convert the enums to their values: By adding: TRACE_DEFINE_ENUM(TLB_FLUSH_ON_TASK_SWITCH); TRACE_DEFINE_ENUM(TLB_REMOTE_SHOOTDOWN); TRACE_DEFINE_ENUM(TLB_LOCAL_SHOOTDOWN); TRACE_DEFINE_ENUM(TLB_LOCAL_MM_SHOOTDOWN); $ cat /sys/kernel/debug/tracing/events/tlb/tlb_flush/format [...] __print_symbolic(REC->reason, { 0, "flush on task switch" }, { 1, "remote shootdown" }, { 2, "local shootdown" }, { 3, "local mm shootdown" }) The above is what userspace expects to see, and tools do not need to be modified to parse them. Link: http://lkml.kernel.org/r/20150403013802.220157513@goodmis.org Cc: Guilherme Cox Cc: Tony Luck Cc: Xie XiuQi Acked-by: Namhyung Kim Reviewed-by: Masami Hiramatsu Tested-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 2 +- include/linux/tracepoint.h | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 62b8fac7ded5..112cf49d9576 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -285,7 +285,7 @@ struct ftrace_event_call { struct tracepoint *tp; }; struct trace_event event; - const char *print_fmt; + char *print_fmt; struct event_filter *filter; void *mod; void *data; diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index c72851328ca9..a5f7f3ecafa3 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -36,6 +36,12 @@ struct tracepoint { struct tracepoint_func __rcu *funcs; }; +struct trace_enum_map { + const char *system; + const char *enum_string; + unsigned long enum_value; +}; + extern int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data); extern int @@ -87,6 +93,8 @@ extern void syscall_unregfunc(void); #define PARAMS(args...) args +#define TRACE_DEFINE_ENUM(x) + #endif /* _LINUX_TRACEPOINT_H */ /* -- cgit v1.2.3 From 3673b8e4ce7237160fa31ee8d7e94a4d5a9976a1 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 25 Mar 2015 15:44:21 -0400 Subject: tracing: Allow for modules to convert their enums to values Update the infrastructure such that modules that declare TRACE_DEFINE_ENUM() will have those enums converted into their values in the tracepoint print fmt strings. Link: http://lkml.kernel.org/r/87vbhjp74q.fsf@rustcorp.com.au Acked-by: Rusty Russell Reviewed-by: Masami Hiramatsu Tested-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- include/linux/module.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 42999fe2dbd0..53dc41dd5c62 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -338,6 +338,8 @@ struct module { #ifdef CONFIG_EVENT_TRACING struct ftrace_event_call **trace_events; unsigned int num_trace_events; + struct trace_enum_map **trace_enums; + unsigned int num_trace_enums; #endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD unsigned int num_ftrace_callsites; -- cgit v1.2.3 From e70deecbf8e1562cac0b19f23848919e2f5d65aa Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 2 Apr 2015 14:31:40 +0200 Subject: netfilter: bridge: don't use nf_bridge_info data to store mac header br_netfilter maintains an extra state, nf_bridge_info, which is attached to skb via skb->nf_bridge pointer. Amongst other things we use skb->nf_bridge->data to store the original mac header for every processed skb. This is required for ip refragmentation when using conntrack on top of bridge, because ip_fragment doesn't copy it from original skb. However there is no need anymore to do this unconditionally. Move this to the one place where its needed -- when br_netfilter calls ip_fragment(). Also switch to percpu storage for this so we can handle fragmenting without accessing nf_bridge meta data. Only user left is neigh resolution when DNAT is detected, to hold the original source mac address (neigh resolution builds new mac header using bridge mac), so rename ->data and reduce its size to whats needed. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 36f3f43c0117..f66a089afc41 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -169,7 +169,7 @@ struct nf_bridge_info { unsigned int mask; struct net_device *physindev; struct net_device *physoutdev; - unsigned long data[32 / sizeof(unsigned long)]; + char neigh_header[8]; }; #endif -- cgit v1.2.3 From c737b7c4510026c200e14de51eb0006adea0fb2f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 2 Apr 2015 14:31:41 +0200 Subject: netfilter: bridge: add helpers for fetching physin/outdev right now we store this in the nf_bridge_info struct, accessible via skb->nf_bridge. This patch prepares removal of this pointer from skb: Instead of using skb->nf_bridge->x, we use helpers to obtain the in/out device (or ifindexes). Followup patches to netfilter will then allow nf_bridge_info to be obtained by a call into the br_netfilter core, rather than keeping a pointer to it in sk_buff. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index 2734977199ca..e1d96bc2767c 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -2,7 +2,7 @@ #define __LINUX_BRIDGE_NETFILTER_H #include - +#include enum nf_br_hook_priorities { NF_BR_PRI_FIRST = INT_MIN, @@ -40,6 +40,27 @@ static inline void br_drop_fake_rtable(struct sk_buff *skb) skb_dst_drop(skb); } +static inline int nf_bridge_get_physinif(const struct sk_buff *skb) +{ + return skb->nf_bridge ? skb->nf_bridge->physindev->ifindex : 0; +} + +static inline int nf_bridge_get_physoutif(const struct sk_buff *skb) +{ + return skb->nf_bridge ? skb->nf_bridge->physoutdev->ifindex : 0; +} + +static inline struct net_device * +nf_bridge_get_physindev(const struct sk_buff *skb) +{ + return skb->nf_bridge ? skb->nf_bridge->physindev : NULL; +} + +static inline struct net_device * +nf_bridge_get_physoutdev(const struct sk_buff *skb) +{ + return skb->nf_bridge ? skb->nf_bridge->physoutdev : NULL; +} #else #define br_drop_fake_rtable(skb) do { } while (0) #endif /* CONFIG_BRIDGE_NETFILTER */ -- cgit v1.2.3 From 3eaf402502e49ad9c58c73e8599c7c4f345d62da Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 2 Apr 2015 14:31:44 +0200 Subject: netfilter: bridge: start splitting mask into public/private chunks ->mask is a bit info field that mixes various use cases. In particular, we have flags that are mutually exlusive, and flags that are only used within br_netfilter while others need to be exposed to other parts of the kernel. Remove BRNF_8021Q/PPPoE flags. They're mutually exclusive and only needed within br_netfilter context. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 4 +--- include/linux/skbuff.h | 5 +++++ 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index e1d96bc2767c..d47a32dffa15 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -20,12 +20,10 @@ enum nf_br_hook_priorities { #define BRNF_PKT_TYPE 0x01 #define BRNF_BRIDGED_DNAT 0x02 #define BRNF_NF_BRIDGE_PREROUTING 0x08 -#define BRNF_8021Q 0x10 -#define BRNF_PPPoE 0x20 static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) { - if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE)) + if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE) return PPPOE_SES_HLEN; return 0; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f66a089afc41..6f75fb5c6ed7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -166,6 +166,11 @@ struct nf_conntrack { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) struct nf_bridge_info { atomic_t use; + enum { + BRNF_PROTO_UNCHANGED, + BRNF_PROTO_8021Q, + BRNF_PROTO_PPPOE + } orig_proto; unsigned int mask; struct net_device *physindev; struct net_device *physoutdev; -- cgit v1.2.3 From a1e67951e6c0b11bb11c256f8e1c45ed51fcd760 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 2 Apr 2015 14:31:45 +0200 Subject: netfilter: bridge: make BRNF_PKT_TYPE flag a bool nf_bridge_info->mask is used for several things, for example to remember if skb->pkt_type was set to OTHER_HOST. For a bridge, OTHER_HOST is expected case. For ip forward its a non-starter though -- routing expects PACKET_HOST. Bridge netfilter thus changes OTHER_HOST to PACKET_HOST before hook invocation and then un-does it after hook traversal. This information is irrelevant outside of br_netfilter. After this change, ->mask now only contains flags that need to be known outside of br_netfilter in fast-path. Future patch changes mask into a 2bit state field in sk_buff, so that we can remove skb->nf_bridge pointer for good and consider all remaining places that access nf_bridge info content a not-so fastpath. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 1 - include/linux/skbuff.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index d47a32dffa15..8912e8c355fd 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -17,7 +17,6 @@ enum nf_br_hook_priorities { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) -#define BRNF_PKT_TYPE 0x01 #define BRNF_BRIDGED_DNAT 0x02 #define BRNF_NF_BRIDGE_PREROUTING 0x08 diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6f75fb5c6ed7..0991259643d6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -171,6 +171,7 @@ struct nf_bridge_info { BRNF_PROTO_8021Q, BRNF_PROTO_PPPOE } orig_proto; + bool pkt_otherhost; unsigned int mask; struct net_device *physindev; struct net_device *physoutdev; -- cgit v1.2.3 From bba0bdd7ad4713d82338bcd9b72d57e9335a664b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 4 Mar 2015 10:31:47 +0100 Subject: Defer processing of REQ_PREEMPT requests for blocked devices SCSI transport drivers and SCSI LLDs block a SCSI device if the transport layer is not operational. This means that in this state no requests should be processed, even if the REQ_PREEMPT flag has been set. This patch avoids that a rescan shortly after a cable pull sporadically triggers the following kernel oops: BUG: unable to handle kernel paging request at ffffc9001a6bc084 IP: [] mlx4_ib_post_send+0xd2/0xb30 [mlx4_ib] Process rescan-scsi-bus (pid: 9241, threadinfo ffff88053484a000, task ffff880534aae100) Call Trace: [] srp_post_send+0x65/0x70 [ib_srp] [] srp_queuecommand+0x1cf/0x3e0 [ib_srp] [] scsi_dispatch_cmd+0x101/0x280 [scsi_mod] [] scsi_request_fn+0x411/0x4d0 [scsi_mod] [] __blk_run_queue+0x27/0x30 [] blk_execute_rq_nowait+0x82/0x110 [] blk_execute_rq+0x62/0xf0 [] scsi_execute+0xe8/0x190 [scsi_mod] [] scsi_execute_req+0xa3/0x130 [scsi_mod] [] scsi_probe_lun+0x17a/0x450 [scsi_mod] [] scsi_probe_and_add_lun+0x156/0x480 [scsi_mod] [] __scsi_scan_target+0xdf/0x1f0 [scsi_mod] [] scsi_scan_host_selected+0x183/0x1c0 [scsi_mod] [] scsi_scan+0xdb/0xe0 [scsi_mod] [] store_scan+0x13/0x20 [scsi_mod] [] sysfs_write_file+0xcb/0x160 [] vfs_write+0xce/0x140 [] sys_write+0x53/0xa0 [] system_call_fastpath+0x16/0x1b [<00007f611c9d9300>] 0x7f611c9d92ff Reported-by: Max Gurtuvoy Signed-off-by: Bart Van Assche Reviewed-by: Mike Christie Cc: Signed-off-by: James Bottomley --- include/linux/blk_types.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index c294e3e25e37..a1b25e35ea5f 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -181,7 +181,9 @@ enum rq_flag_bits { __REQ_ELVPRIV, /* elevator private data attached */ __REQ_FAILED, /* set if the request failed */ __REQ_QUIET, /* don't worry about errors */ - __REQ_PREEMPT, /* set for "ide_preempt" requests */ + __REQ_PREEMPT, /* set for "ide_preempt" requests and also + for requests for which the SCSI "quiesce" + state must be ignored. */ __REQ_ALLOCED, /* request came from our alloc pool */ __REQ_COPY_USER, /* contains copies of user pages */ __REQ_FLUSH_SEQ, /* request for flush sequence */ -- cgit v1.2.3 From 9b174527e7b756cda9f5d9e541f87b7fec9cfdf0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 18 Feb 2015 14:12:42 -0300 Subject: [media] Add and use IS_REACHABLE macro In the media drivers, the v4l2 core knows about all submodules and calls into them from a common function. However this cannot work if the modules that get called are loadable and the core is built-in. In that case we get drivers/built-in.o: In function `set_type': drivers/media/v4l2-core/tuner-core.c:301: undefined reference to `tea5767_attach' drivers/media/v4l2-core/tuner-core.c:307: undefined reference to `tea5761_attach' drivers/media/v4l2-core/tuner-core.c:349: undefined reference to `tda9887_attach' drivers/media/v4l2-core/tuner-core.c:405: undefined reference to `xc4000_attach' This was working previously, until the IS_ENABLED() macro was used to replace the construct like #if defined(CONFIG_DVB_CX24110) || (defined(CONFIG_DVB_CX24110_MODULE) && defined(MODULE)) with the difference that the new code no longer checks whether it is being built as a loadable module itself. To fix this, this new patch adds an 'IS_REACHABLE' macro, which evaluates true in exactly the condition that was used previously. The downside of this is that this trades an obvious link error for a more subtle runtime failure, but it is clear that the change that introduced the link error was unintentional and it seems better to revert it for now. Also, a similar change was originally created by Trent Piepho and then reverted by teh change to the IS_ENABLED macro. Ideally Kconfig would be used to avoid the case of a broken dependency, or the code restructured in a way to turn around the dependency, but either way would require much larger changes here. Fixes: 7b34be71db53 ("[media] use IS_ENABLED() macro") See-also: c5dec9fb248e ("V4L/DVB (4751): Fix DBV_FE_CUSTOMISE for card drivers compiled into kernel") Signed-off-by: Arnd Bergmann Signed-off-by: Mauro Carvalho Chehab --- include/linux/kconfig.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kconfig.h b/include/linux/kconfig.h index be342b94c640..16cfb3448568 100644 --- a/include/linux/kconfig.h +++ b/include/linux/kconfig.h @@ -43,4 +43,13 @@ */ #define IS_MODULE(option) config_enabled(option##_MODULE) +/* + * IS_REACHABLE(CONFIG_FOO) evaluates to 1 if the currently compiled + * code can call a function defined in code compiled based on CONFIG_FOO. + * This is similar to IS_ENABLED(), but returns false when invoked from + * built-in code when CONFIG_FOO is set to 'm'. + */ +#define IS_REACHABLE(option) (config_enabled(option) || \ + (config_enabled(option##_MODULE) && config_enabled(MODULE))) + #endif /* __LINUX_KCONFIG_H */ -- cgit v1.2.3 From 3bca4cf6022ee2ff1ff2142bed28a6fca00d3a44 Mon Sep 17 00:00:00 2001 From: Alessio Igor Bogani Date: Wed, 8 Apr 2015 12:15:18 +0200 Subject: net: phy: broadcom: Add BCM54616S phy entry Signed-off-by: Alessio Igor Bogani Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 7ccd928cc1f2..1c9920b38fa1 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -11,6 +11,7 @@ #define PHY_ID_BCM5421 0x002060e0 #define PHY_ID_BCM5464 0x002060b0 #define PHY_ID_BCM5461 0x002060c0 +#define PHY_ID_BCM54616S 0x03625d10 #define PHY_ID_BCM57780 0x03625d90 #define PHY_ID_BCM7250 0xae025280 -- cgit v1.2.3 From 18e94a3384368340d438cf67fcaebd72084f34f8 Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Wed, 25 Mar 2015 14:31:41 +0800 Subject: PCI: Make a shareable UUID for PCI firmware ACPI _DSM The PCI Firmware Specification, r3.0, sec 4.6.4.1.3, defines a single UUID for an ACPI _DSM method to provide device-specific control functions. This _DSM method support several functions, including PCI Express Slot Information, PCI Express Slot Number, PCI Bus Capabilities, etc. Move the UUID definition from pci/pci-label.c, where it could be used only for one function, to pci/pci-acpi.c where it can be shared for all these functions. [bhelgaas: changelog] Signed-off-by: Aaron Lu Signed-off-by: Bjorn Helgaas Acked-by: Rafael J. Wysocki --- include/linux/pci-acpi.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 24c7728ca681..3801c704a945 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -77,6 +77,9 @@ static inline void acpiphp_remove_slots(struct pci_bus *bus) { } static inline void acpiphp_check_host_bridge(struct acpi_device *adev) { } #endif +extern const u8 pci_acpi_dsm_uuid[]; +#define DEVICE_LABEL_DSM 0x07 + #else /* CONFIG_ACPI */ static inline void acpi_pci_add_bus(struct pci_bus *bus) { } static inline void acpi_pci_remove_bus(struct pci_bus *bus) { } -- cgit v1.2.3 From e33caa82e221b0bbeba373b507e3f134dc2efa1a Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Wed, 25 Mar 2015 14:37:06 +0800 Subject: PCI/ACPI: Optimize device state transition delays The PCI "ACPI additions for FW latency optimizations" ECN (link below) defines two functions in the PCI _DSM: Function 8, "Reset Delay," applies to the entire hierarchy below a PCI host bridge. If it returns one, the OS may assume that all devices in the hierarchy have already completed power-on reset delays. Function 9, "Device Readiness Durations," applies only to the object where it is located. It returns delay durations required after various events if the device requires less time than the spec requires. Delays from this function take precedence over the Reset Delay function. Add support for Reset Delay and part of Device Readiness Durations. [bhelgaas: changelog, comments] Link: https://www.pcisig.com/specifications/conventional/pci_firmware/ECN_fw_latency_optimization_final.pdf Signed-off-by: Aaron Lu Signed-off-by: Bjorn Helgaas --- include/linux/pci-acpi.h | 2 ++ include/linux/pci.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 3801c704a945..a965efa52152 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -79,6 +79,8 @@ static inline void acpiphp_check_host_bridge(struct acpi_device *adev) { } extern const u8 pci_acpi_dsm_uuid[]; #define DEVICE_LABEL_DSM 0x07 +#define RESET_DELAY_DSM 0x08 +#define FUNCTION_DELAY_DSM 0x09 #else /* CONFIG_ACPI */ static inline void acpi_pci_add_bus(struct pci_bus *bus) { } diff --git a/include/linux/pci.h b/include/linux/pci.h index 211e9da8a7d7..c5aa7b8a40c1 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -406,6 +406,7 @@ struct pci_host_bridge { struct list_head windows; /* resource_entry */ void (*release_fn)(struct pci_host_bridge *); void *release_data; + unsigned int ignore_reset_delay:1; /* for entire hierarchy */ }; #define to_pci_host_bridge(n) container_of(n, struct pci_host_bridge, dev) -- cgit v1.2.3 From 1b7047edfcfb257f69e306c9afbab150cb987717 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 18 Mar 2015 11:01:22 +0000 Subject: genirq: Allow the irqchip state of an IRQ to be save/restored There is a number of cases where a kernel subsystem may want to introspect the state of an interrupt at the irqchip level: - When a peripheral is shared between virtual machines, its interrupt state becomes part of the guest's state, and must be switched accordingly. KVM on arm/arm64 requires this for its guest-visible timer - Some GPIO controllers seem to require peeking into the interrupt controller they are connected to to report their internal state This seem to be a pattern that is common enough for the core code to try and support this without too many horrible hacks. Introduce a pair of accessors (irq_get_irqchip_state/irq_set_irqchip_state) to retrieve the bits that can be of interest to another subsystem: pending, active, and masked. - irq_get_irqchip_state returns the state of the interrupt according to a parameter set to IRQCHIP_STATE_PENDING, IRQCHIP_STATE_ACTIVE, IRQCHIP_STATE_MASKED or IRQCHIP_STATE_LINE_LEVEL. - irq_set_irqchip_state similarly sets the state of the interrupt. Signed-off-by: Marc Zyngier Reviewed-by: Bjorn Andersson Tested-by: Bjorn Andersson Cc: linux-arm-kernel@lists.infradead.org Cc: Abhijeet Dharmapurikar Cc: Stephen Boyd Cc: Phong Vo Cc: Linus Walleij Cc: Tin Huynh Cc: Y Vo Cc: Toan Le Cc: Bjorn Andersson Cc: Jason Cooper Cc: Arnd Bergmann Link: http://lkml.kernel.org/r/1426676484-21812-2-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- include/linux/interrupt.h | 14 ++++++++++++++ include/linux/irq.h | 6 ++++++ 2 files changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 150dde04cf4f..950ae4501826 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -361,6 +361,20 @@ static inline int disable_irq_wake(unsigned int irq) return irq_set_irq_wake(irq, 0); } +/* + * irq_get_irqchip_state/irq_set_irqchip_state specific flags + */ +enum irqchip_irq_state { + IRQCHIP_STATE_PENDING, /* Is interrupt pending? */ + IRQCHIP_STATE_ACTIVE, /* Is interrupt in progress? */ + IRQCHIP_STATE_MASKED, /* Is interrupt masked? */ + IRQCHIP_STATE_LINE_LEVEL, /* Is IRQ line high? */ +}; + +extern int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which, + bool *state); +extern int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, + bool state); #ifdef CONFIG_IRQ_FORCED_THREADING extern bool force_irqthreads; diff --git a/include/linux/irq.h b/include/linux/irq.h index d09ec7a1243e..77dd2e7f93f4 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -30,6 +30,7 @@ struct seq_file; struct module; struct msi_msg; +enum irqchip_irq_state; /* * IRQ line status. @@ -324,6 +325,8 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) * irq_request_resources * @irq_compose_msi_msg: optional to compose message content for MSI * @irq_write_msi_msg: optional to write message content for MSI + * @irq_get_irqchip_state: return the internal state of an interrupt + * @irq_set_irqchip_state: set the internal state of a interrupt * @flags: chip specific flags */ struct irq_chip { @@ -363,6 +366,9 @@ struct irq_chip { void (*irq_compose_msi_msg)(struct irq_data *data, struct msi_msg *msg); void (*irq_write_msi_msg)(struct irq_data *data, struct msi_msg *msg); + int (*irq_get_irqchip_state)(struct irq_data *data, enum irqchip_irq_state which, bool *state); + int (*irq_set_irqchip_state)(struct irq_data *data, enum irqchip_irq_state which, bool state); + unsigned long flags; }; -- cgit v1.2.3 From 76184ac17edf3c640390b0eddc3aa7be1095fb9f Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 25 Mar 2015 11:27:52 +0000 Subject: mmc: dw_mmc: fix fifo ordering in big endian The dw_mmc driver changes to make the IO accesors endian agnostic did not take into account the fifo accesses do not need to be swapped. To fix this add a mmci_fifo_read/write wrapper to allow these to be passed through the IO without being swapped. Since these are now specific functions, it would be easier just to store the pointer to the fifo registers in the host block instead of the offset to them. So change the host->data_offset to host->fifo_reg (which also means we catch all the places this is read or written). Signed-off-by: Ben Dooks Signed-off-by: Jaehoon Chung Signed-off-by: Ulf Hansson --- include/linux/mmc/dw_mmc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h index 9efc567e3ced..12111993a317 100644 --- a/include/linux/mmc/dw_mmc.h +++ b/include/linux/mmc/dw_mmc.h @@ -44,6 +44,7 @@ struct mmc_data; * struct dw_mci - MMC controller state shared between all slots * @lock: Spinlock protecting the queue and associated data. * @regs: Pointer to MMIO registers. + * @fifo_reg: Pointer to MMIO registers for data FIFO * @sg: Scatterlist entry currently being processed by PIO code, if any. * @sg_miter: PIO mapping scatterlist iterator. * @cur_slot: The slot which is currently using the controller. @@ -79,7 +80,6 @@ struct mmc_data; * @current_speed: Configured rate of the controller. * @num_slots: Number of slots available. * @verid: Denote Version ID. - * @data_offset: Set the offset of DATA register according to VERID. * @dev: Device associated with the MMC controller. * @pdata: Platform data associated with the MMC controller. * @drv_data: Driver specific data for identified variant of the controller @@ -132,6 +132,7 @@ struct dw_mci { spinlock_t lock; spinlock_t irq_lock; void __iomem *regs; + void __iomem *fifo_reg; struct scatterlist *sg; struct sg_mapping_iter sg_miter; @@ -172,7 +173,6 @@ struct dw_mci { u32 num_slots; u32 fifoth_val; u16 verid; - u16 data_offset; struct device *dev; struct dw_mci_board *pdata; const struct dw_mci_drv_data *drv_data; -- cgit v1.2.3 From c0ccf6f99e3a43b87980c9df7da48427885206d0 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 9 Apr 2015 13:51:31 +1000 Subject: jump_label: Allow jump labels to be used in assembly To use jump labels in assembly we need the HAVE_JUMP_LABEL define, so we select a fallback version if the toolchain does not support them. Modify linux/jump_label.h so it can be included by assembly files. We also need to add -DCC_HAVE_ASM_GOTO to KBUILD_AFLAGS. Signed-off-by: Anton Blanchard Acked-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: benh@kernel.crashing.org Cc: catalin.marinas@arm.com Cc: davem@davemloft.net Cc: heiko.carstens@de.ibm.com Cc: jbaron@akamai.com Cc: linux@arm.linux.org.uk Cc: linuxppc-dev@lists.ozlabs.org Cc: liuj97@gmail.com Cc: mgorman@suse.de Cc: mmarek@suse.cz Cc: mpe@ellerman.id.au Cc: paulus@samba.org Cc: ralf@linux-mips.org Cc: rostedt@goodmis.org Cc: schwidefsky@de.ibm.com Cc: will.deacon@arm.com Link: http://lkml.kernel.org/r/1428551492-21977-2-git-send-email-anton@samba.org Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 98f923b6a0ea..f4de473f226b 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -45,6 +45,12 @@ * same as using STATIC_KEY_INIT_FALSE. */ +#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) +# define HAVE_JUMP_LABEL +#endif + +#ifndef __ASSEMBLY__ + #include #include #include @@ -55,7 +61,7 @@ extern bool static_key_initialized; "%s used before call to jump_label_init", \ __func__) -#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) +#ifdef HAVE_JUMP_LABEL struct static_key { atomic_t enabled; @@ -66,13 +72,18 @@ struct static_key { #endif }; -# include -# define HAVE_JUMP_LABEL #else struct static_key { atomic_t enabled; }; -#endif /* CC_HAVE_ASM_GOTO && CONFIG_JUMP_LABEL */ +#endif /* HAVE_JUMP_LABEL */ +#endif /* __ASSEMBLY__ */ + +#ifdef HAVE_JUMP_LABEL +#include +#endif + +#ifndef __ASSEMBLY__ enum jump_label_type { JUMP_LABEL_DISABLE = 0, @@ -203,3 +214,5 @@ static inline bool static_key_enabled(struct static_key *key) } #endif /* _LINUX_JUMP_LABEL_H */ + +#endif /* __ASSEMBLY__ */ -- cgit v1.2.3 From e554a99ee8d09132e80dc467433c9a4df9054645 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 2 Apr 2015 16:36:15 +0200 Subject: mfd: sec: Fix RTC alarm interrupt number on S2MPS11 The RTC on S2MPS11 is the same as S2MPS14. However interrupt numbers of RTC alarms 0 and 1 were inversed between these two devices. So when rtc-s5m driver requested S2MPS14_IRQ_RTCA0 interrupt, it matched to S2MPS11_IRQ_RTCA1, not RTCA0. Fix this by using consistent RTC alarm interrupt numbers and adding a BUILD_BUG_ON for future generations. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Lee Jones --- include/linux/mfd/samsung/irq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/samsung/irq.h b/include/linux/mfd/samsung/irq.h index f35af7361b60..667aa40486dd 100644 --- a/include/linux/mfd/samsung/irq.h +++ b/include/linux/mfd/samsung/irq.h @@ -74,8 +74,8 @@ enum s2mps11_irq { S2MPS11_IRQ_MRB, S2MPS11_IRQ_RTC60S, - S2MPS11_IRQ_RTCA0, S2MPS11_IRQ_RTCA1, + S2MPS11_IRQ_RTCA0, S2MPS11_IRQ_SMPL, S2MPS11_IRQ_RTC1S, S2MPS11_IRQ_WTSR, -- cgit v1.2.3 From c8a8585431efba0faaf41167f8f7c27c48307ca6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vianney=20le=20Cl=C3=A9ment=20de=20Saint-Marcq?= Date: Mon, 30 Mar 2015 10:34:58 +0200 Subject: iio: core: Introduce IIO_CHAN_INFO_CALIBEMISSIVITY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Contact-less IR temperature sensors measure the temperature of an object by using its thermal radiation. Surfaces with different emissivity ratios emit different amounts of energy at the same temperature. IIO_CHAN_INFO_CALIBEMISSIVITY allows the user to inform the sensor of the emissivity of the object in front of it, in order to effectively measure its temperature. A device providing such setting is Melexis's MLX90614: http://melexis.com/Assets/IR-sensor-thermometer-MLX90614-Datasheet-5152.aspx. Signed-off-by: Vianney le Clément de Saint-Marcq Cc: Arnout Vandecappelle (Essensium/Mind) Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index d86b753e9b30..b1e46ae89aa7 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -43,6 +43,7 @@ enum iio_chan_info_enum { IIO_CHAN_INFO_CALIBWEIGHT, IIO_CHAN_INFO_DEBOUNCE_COUNT, IIO_CHAN_INFO_DEBOUNCE_TIME, + IIO_CHAN_INFO_CALIBEMISSIVITY, }; enum iio_shared_by { -- cgit v1.2.3 From 820f9f147dcce2602eefd9b575bbbd9ea14f0953 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 2 Apr 2015 16:35:48 -0500 Subject: fs_pin: Allow for the possibility that m_list or s_list go unused. This is needed to support lazily umounting locked mounts. Because the entire unmounted subtree needs to stay together until there are no users with references to any part of the subtree. To support this guarantee that the fs_pin m_list and s_list nodes are initialized by initializing them in init_fs_pin allowing for the possibility that pin_insert_group does not touch them. Further use hlist_del_init in pin_remove so that there is a hlist_unhashed test before the list we attempt to update the previous list item. Signed-off-by: "Eric W. Biederman" --- include/linux/fs_pin.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs_pin.h b/include/linux/fs_pin.h index 9dc4e0384bfb..3886b3bffd7f 100644 --- a/include/linux/fs_pin.h +++ b/include/linux/fs_pin.h @@ -13,6 +13,8 @@ struct vfsmount; static inline void init_fs_pin(struct fs_pin *p, void (*kill)(struct fs_pin *)) { init_waitqueue_head(&p->wait); + INIT_HLIST_NODE(&p->s_list); + INIT_HLIST_NODE(&p->m_list); p->kill = kill; } -- cgit v1.2.3 From 387d37577fdd05e9472c20885464c2a53b3c945f Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 7 Apr 2015 11:07:00 -0700 Subject: PCI: Don't clear ASPM bits when the FADT declares it's unsupported Communications with a hardware vendor confirm that the expected behaviour on systems that set the FADT ASPM disable bit but which still grant full PCIe control is for the OS to leave any BIOS configuration intact and refuse to touch the ASPM bits. This mimics the behaviour of Windows. Signed-off-by: Matthew Garrett Signed-off-by: Bjorn Helgaas --- include/linux/pci-aspm.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci-aspm.h b/include/linux/pci-aspm.h index 8af4610c2e41..207c561fb40e 100644 --- a/include/linux/pci-aspm.h +++ b/include/linux/pci-aspm.h @@ -29,7 +29,6 @@ void pcie_aspm_pm_state_change(struct pci_dev *pdev); void pcie_aspm_powersave_config_link(struct pci_dev *pdev); void pci_disable_link_state(struct pci_dev *pdev, int state); void pci_disable_link_state_locked(struct pci_dev *pdev, int state); -void pcie_clear_aspm(struct pci_bus *bus); void pcie_no_aspm(void); #else static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) @@ -47,9 +46,6 @@ static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) static inline void pci_disable_link_state(struct pci_dev *pdev, int state) { } -static inline void pcie_clear_aspm(struct pci_bus *bus) -{ -} static inline void pcie_no_aspm(void) { } -- cgit v1.2.3 From 2963e3f7e8e3465895897a175560210120b932ac Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 9 Apr 2015 15:54:05 -0600 Subject: blk-mq: cleanup blk_mq_rq_to_pdu() Casting to void and adding the size of the request is "shit code" and only a "crazy monkey on crack" would write that. So lets clean it up. Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index ebfe707cf722..8210e8797c12 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -230,7 +230,7 @@ void blk_mq_freeze_queue_start(struct request_queue *q); /* * Driver command data is immediately after the request. So subtract request - * size to get back to the original request. + * size to get back to the original request, add request size to get the PDU. */ static inline struct request *blk_mq_rq_from_pdu(void *pdu) { @@ -238,7 +238,7 @@ static inline struct request *blk_mq_rq_from_pdu(void *pdu) } static inline void *blk_mq_rq_to_pdu(struct request *rq) { - return (void *) rq + sizeof(*rq); + return rq + 1; } #define queue_for_each_hw_ctx(q, hctx, i) \ -- cgit v1.2.3 From 99492c39f39fc2d8c4ae36ecfb88d7de5d8106b5 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Fri, 3 Apr 2015 08:57:51 -0400 Subject: earlycon: Fix __earlycon_table stride The compiler and the linker must agree on the alignment of struct earlycon_id; empirical testing and commit 07fca0e57fca92 ("tracing: Properly align linker defined symbols") suggests 32-byte alignment is the LCD. Reported-by: Yinghai Lu Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 34de16840152..025dad9dcde4 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -342,7 +342,7 @@ struct earlycon_device { struct earlycon_id { char name[16]; int (*setup)(struct earlycon_device *, const char *options); -}; +} __aligned(32); extern int setup_earlycon(char *buf); extern int of_setup_earlycon(unsigned long addr, -- cgit v1.2.3 From 2b2190a375d796a5ad9ec557cb51269f36b264d4 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Mon, 6 Apr 2015 15:38:39 +0300 Subject: i2c: change input parameter to i2c_adapter for prepare/unprepare_recovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch changes type of input parameter for prepare/unprepare_recovery() callbacks from struct i2c_bus_recovery_info * to struct i2c_adapter *. This allows to simplify implementation of these callbacks and avoid type conversations from i2c_bus_recovery_info to i2c_adapter. The i2c_bus_recovery_info can be simply retrieved from struct i2c_adapter which contains pointer on it. There are no users currently, so this is safe to do. Acked-by: Uwe Kleine-König Acked-by: Alexander Sverdlin Signed-off-by: Grygorii Strashko Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index c5e4bb2c5759..898033f41d76 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -435,8 +435,8 @@ struct i2c_bus_recovery_info { void (*set_scl)(struct i2c_adapter *, int val); int (*get_sda)(struct i2c_adapter *); - void (*prepare_recovery)(struct i2c_bus_recovery_info *bri); - void (*unprepare_recovery)(struct i2c_bus_recovery_info *bri); + void (*prepare_recovery)(struct i2c_adapter *); + void (*unprepare_recovery)(struct i2c_adapter *); /* gpio recovery */ int scl_gpio; -- cgit v1.2.3 From 7ef97e9a312c359a2b32a7b5d918c60f238b69b2 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Mon, 6 Apr 2015 15:38:41 +0300 Subject: i2c: davinci: use ICPFUNC to toggle I2C as gpio for bus recovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Having a board where the I2C bus locks up occasionally made it clear that the bus recovery in the i2c-davinci driver will only work on some boards, because on regular boards, this will only toggle GPIO lines that aren't muxed to the actual pins. The I2C controller on SoCs like da850 (and da830), Keystone 2 has the built-in capability to bit-bang its lines by using the ICPFUNC registers of the i2c controller. Implement the suggested procedure by toggling SCL and checking SDA using the ICPFUNC registers of the I2C controller when present. Allow platforms to indicate the presence of the ICPFUNC registers with a has_pfunc platform data flag and add optional DT property "ti,has-pfunc" to indicate the same in DT. Reviewed-by: Uwe Kleine-König Acked-by: Alexander Sverdlin Tested-by: Michael Lawnick Signed-off-by: Ben Gardiner Signed-off-by: Mike Looijmans [grygorii.strashko@ti.com: combined patches from Ben Gardiner and Mike Looijmans and reimplemented ICPFUNC bus recovery using I2C bus recovery infrastructure] Signed-off-by: Grygorii Strashko Signed-off-by: Wolfram Sang --- include/linux/platform_data/i2c-davinci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/i2c-davinci.h b/include/linux/platform_data/i2c-davinci.h index 2312d197dfb7..89fd34727a24 100644 --- a/include/linux/platform_data/i2c-davinci.h +++ b/include/linux/platform_data/i2c-davinci.h @@ -18,6 +18,7 @@ struct davinci_i2c_platform_data { unsigned int bus_delay; /* post-transaction delay (usec) */ unsigned int sda_pin; /* GPIO pin ID to use for SDA */ unsigned int scl_pin; /* GPIO pin ID to use for SCL */ + bool has_pfunc; /*chip has a ICPFUNC register */ }; /* for board setup code */ -- cgit v1.2.3 From 510022a85839a8409d1e6a519bb86ce71a84f30a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 30 Mar 2015 15:07:16 -0700 Subject: f2fs: add F2FS_INLINE_DOTS to recover missing dot dentries If f2fs was corrupted with missing dot dentries, it needs to recover them after fsck.f2fs detection. The underlying precedure is: 1. The fsck.f2fs remains F2FS_INLINE_DOTS flag in directory inode, if it detects missing dot dentries. 2. When f2fs looks up the corrupted directory, it triggers f2fs_add_link with proper inode numbers and their dot and dotdot names. 3. Once f2fs recovers the directory without errors, it removes F2FS_INLINE_DOTS finally. Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 502f28cfb78e..591f8c3ef410 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -178,6 +178,7 @@ struct f2fs_extent { #define F2FS_INLINE_DATA 0x02 /* file inline data flag */ #define F2FS_INLINE_DENTRY 0x04 /* file inline dentry flag */ #define F2FS_DATA_EXIST 0x08 /* file inline data exist flag */ +#define F2FS_INLINE_DOTS 0x10 /* file having implicit dot dentries */ #define MAX_INLINE_DATA (sizeof(__le32) * (DEF_ADDRS_PER_INODE - \ F2FS_INLINE_XATTR_ADDRS - 1)) -- cgit v1.2.3 From b9f28d863594c429e1df35a0474d2663ca28b307 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Thu, 5 Mar 2015 18:47:01 -0800 Subject: sd, mmc, virtio_blk, string_helpers: fix block size units The current string_get_size() overflows when the device size goes over 2^64 bytes because the string helper routine computes the suffix from the size in bytes. However, the entirety of Linux thinks in terms of blocks, not bytes, so this will artificially induce an overflow on very large devices. Fix this by making the function string_get_size() take blocks and the block size instead of bytes. This should allow us to keep working until the current SCSI standard overflows. Also fix virtio_blk and mmc (both of which were also artificially multiplying by the block size to pass a byte side to string_get_size()). The mathematics of this is pretty simple: we're taking a product of size in blocks (S) and block size (B) and trying to re-express this in exponential form: S*B = R*N^E (where N, the exponent is either 1000 or 1024) and R < N. Mathematically, S = RS*N^ES and B=RB*N^EB, so if RS*RB < N it's easy to see that S*B = RS*RB*N^(ES+EB). However, if RS*BS > N, we can see that this can be re-expressed as RS*BS = R*N (where R = RS*BS/N < N) so the whole exponent becomes R*N^(ES+EB+1) [jejb: fix incorrect 32 bit do_div spotted by kbuild test robot ] Acked-by: Ulf Hansson Reviewed-by: Andrew Morton Signed-off-by: James Bottomley --- include/linux/string_helpers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index 657571817260..263328063730 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -10,7 +10,7 @@ enum string_size_units { STRING_UNITS_2, /* use binary powers of 2^10 */ }; -void string_get_size(u64 size, enum string_size_units units, +void string_get_size(u64 size, u64 blk_size, enum string_size_units units, char *buf, int len); #define UNESCAPE_SPACE 0x01 -- cgit v1.2.3 From 820ad9752c232239d3278eafe71c2c251ae233d3 Mon Sep 17 00:00:00 2001 From: Martin Fuzzey Date: Wed, 18 Mar 2015 14:53:17 +0100 Subject: clk: clk-gpio-gate: Fix active low The active low flag in the DT cell is currently ignored. This occurs because of_get_named_gpio_flags() does not apply the flags to the underlying struct gpio_desc so the test in clk_register_gpio_gate() was bogus. Note that this patch changes the internal kernel API for clk_register_gpio_gate() but there are currently no other users. Signed-off-by: Martin Fuzzey Acked-by: Jyri Sarha Signed-off-by: Michael Turquette --- include/linux/clk-provider.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 5591ea71a8d1..df695313f975 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -541,7 +541,7 @@ struct clk_gpio { extern const struct clk_ops clk_gpio_gate_ops; struct clk *clk_register_gpio_gate(struct device *dev, const char *name, - const char *parent_name, struct gpio_desc *gpio, + const char *parent_name, unsigned gpio, bool active_low, unsigned long flags); void of_gpio_clk_gate_setup(struct device_node *node); -- cgit v1.2.3 From 01a3d796813d6302af9f828f34b73d21a4b96c9a Mon Sep 17 00:00:00 2001 From: Vlad Zolotarov Date: Mon, 30 Mar 2015 21:35:23 +0300 Subject: if_link: Add an additional parameter to ifla_vf_info for RSS querying Add configuration setting for drivers to allow/block an RSS Redirection Table and a Hash Key querying for discrete VFs. On some devices VF share the mentioned above information with PF and querying it may adduce a theoretical security risk. We want to let a system administrator to decide if he/she wants to take this risk or not. Signed-off-by: Vlad Zolotarov Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher --- include/linux/if_link.h | 1 + include/linux/netdevice.h | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 119130e9298b..da4929927f69 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -14,5 +14,6 @@ struct ifla_vf_info { __u32 linkstate; __u32 min_tx_rate; __u32 max_tx_rate; + __u32 rss_query_en; }; #endif /* _LINUX_IF_LINK_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bf6d9df34d7b..13acb3d8ecdd 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -878,6 +878,11 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * int (*ndo_set_vf_link_state)(struct net_device *dev, int vf, int link_state); * int (*ndo_set_vf_port)(struct net_device *dev, int vf, * struct nlattr *port[]); + * + * Enable or disable the VF ability to query its RSS Redirection Table and + * Hash Key. This is needed since on some devices VF share this information + * with PF and querying it may adduce a theoretical security risk. + * int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting); * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); * int (*ndo_setup_tc)(struct net_device *dev, u8 tc) * Called to setup 'tc' number of traffic classes in the net device. This @@ -1099,6 +1104,9 @@ struct net_device_ops { struct nlattr *port[]); int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); + int (*ndo_set_vf_rss_query_en)( + struct net_device *dev, + int vf, bool setting); int (*ndo_setup_tc)(struct net_device *dev, u8 tc); #if IS_ENABLED(CONFIG_FCOE) int (*ndo_fcoe_enable)(struct net_device *dev); -- cgit v1.2.3 From d8725c86aebaf3516e220760aaf5fefc73825188 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 11 Dec 2014 00:02:50 -0500 Subject: get rid of the size argument of sock_sendmsg() it's equal to iov_iter_count(&msg->msg_iter) in all cases Signed-off-by: Al Viro --- include/linux/net.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index e74114bcca68..738ea48be889 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -211,7 +211,7 @@ int sock_create(int family, int type, int proto, struct socket **res); int sock_create_kern(int family, int type, int proto, struct socket **res); int sock_create_lite(int family, int type, int proto, struct socket **res); void sock_release(struct socket *sock); -int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len); +int sock_sendmsg(struct socket *sock, struct msghdr *msg); int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags); struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname); -- cgit v1.2.3 From 01e97e6517053d7c0b9af5248e944a9209909cf5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 15 Dec 2014 21:39:31 -0500 Subject: new helper: msg_data_left() convert open-coded instances Signed-off-by: Al Viro --- include/linux/socket.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index c9852ef7e317..5bf59c8493b7 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -139,6 +139,11 @@ static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg); } +static inline size_t msg_data_left(struct msghdr *msg) +{ + return iov_iter_count(&msg->msg_iter); +} + /* "Socket"-level control message types: */ #define SCM_RIGHTS 0x01 /* rw: access rights (array of int) */ -- cgit v1.2.3 From f6194213cbe871341cd5dfcfe31b4de78ef9503f Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 26 Mar 2015 13:07:29 +0000 Subject: clk: at91: change to using endian agnositc IO Change to using endian agnostic _relaxed IO accessors instead of __raw Signed-off-by: Ben Dooks -- CC: Andrew Victor CC: Nicolas Ferre CC: Jean-Christophe Plagniol-Villard CC: Mike Turquette (maintainer:COMMON CLK FRAMEWORK) CC: Stephen Boyd (maintainer:COMMON CLK FRAMEWORK) CC: linux-kernel@vger.kernel.org (open list:COMMON CLK FRAMEWORK) Signed-off-by: Michael Turquette --- include/linux/clk/at91_pmc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h index c8e3b3d1eded..7669f7618f39 100644 --- a/include/linux/clk/at91_pmc.h +++ b/include/linux/clk/at91_pmc.h @@ -20,10 +20,10 @@ extern void __iomem *at91_pmc_base; #define at91_pmc_read(field) \ - __raw_readl(at91_pmc_base + field) + readl_relaxed(at91_pmc_base + field) #define at91_pmc_write(field, value) \ - __raw_writel(value, at91_pmc_base + field) + writel_relaxed(value, at91_pmc_base + field) #else .extern at91_pmc_base #endif -- cgit v1.2.3 From fd2f7cb5bcac58b63717cd45366bff9a6ab961c6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 22 Feb 2015 20:07:13 -0500 Subject: kill struct filename.separate just make const char iname[] the last member and compare name->name with name->iname instead of checking name->separate We need to make sure that out-of-line name doesn't end up allocated adjacent to struct filename refering to it; fortunately, it's easy to achieve - just allocate that struct filename with one byte in ->iname[], so that ->iname[0] will be inside the same object and thus have an address different from that of out-of-line name [spotted by Boqun Feng ] Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index b4d71b5e1ff2..d70e333988f1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2144,7 +2144,7 @@ struct filename { const __user char *uptr; /* original userland pointer */ struct audit_names *aname; int refcnt; - bool separate; /* should "name" be freed? */ + const char iname[]; }; extern long vfs_truncate(struct path *, loff_t); -- cgit v1.2.3 From 3f7036a071b879da017eddaedb10fba173fdf1ff Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 8 Mar 2015 19:28:30 -0400 Subject: switch security_inode_getattr() to struct path * Signed-off-by: Al Viro --- include/linux/security.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index a1b7dbd127ff..4e14e3d6309f 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1556,7 +1556,7 @@ struct security_operations { int (*inode_follow_link) (struct dentry *dentry, struct nameidata *nd); int (*inode_permission) (struct inode *inode, int mask); int (*inode_setattr) (struct dentry *dentry, struct iattr *attr); - int (*inode_getattr) (struct vfsmount *mnt, struct dentry *dentry); + int (*inode_getattr) (const struct path *path); int (*inode_setxattr) (struct dentry *dentry, const char *name, const void *value, size_t size, int flags); void (*inode_post_setxattr) (struct dentry *dentry, const char *name, @@ -1843,7 +1843,7 @@ int security_inode_readlink(struct dentry *dentry); int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd); int security_inode_permission(struct inode *inode, int mask); int security_inode_setattr(struct dentry *dentry, struct iattr *attr); -int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry); +int security_inode_getattr(const struct path *path); int security_inode_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); void security_inode_post_setxattr(struct dentry *dentry, const char *name, @@ -2259,8 +2259,7 @@ static inline int security_inode_setattr(struct dentry *dentry, return 0; } -static inline int security_inode_getattr(struct vfsmount *mnt, - struct dentry *dentry) +static inline int security_inode_getattr(const struct path *path) { return 0; } -- cgit v1.2.3 From 171a02032bf1e1bb35442a38d6e25e0dcbb85c63 Mon Sep 17 00:00:00 2001 From: Anton Altaparmakov Date: Wed, 11 Mar 2015 10:43:31 -0400 Subject: VFS: Add iov_iter_fault_in_multipages_readable() simillar to iov_iter_fault_in_readable() but differs in that it is not limited to faulting in the first iovec and instead faults in "bytes" bytes iterating over the iovecs as necessary. Also, instead of only faulting in the first and last page of the range, all pages are faulted in. This function is needed by NTFS when it does multi page file writes. Signed-off-by: Anton Altaparmakov Signed-off-by: Al Viro --- include/linux/uio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 71880299ed48..a69ed25af07f 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -76,6 +76,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, struct iov_iter *i, unsigned long offset, size_t bytes); void iov_iter_advance(struct iov_iter *i, size_t bytes); int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); +int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes); size_t iov_iter_single_seg_count(const struct iov_iter *i); size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); -- cgit v1.2.3 From 493c84c0722ac991a96df2fd6c7798b2126b626c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 3 Apr 2015 15:06:43 -0400 Subject: new helper: __vfs_write() Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 20fe15fe236d..4a1cb00bd805 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1639,6 +1639,7 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, struct iovec **ret_pointer); extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *); +extern ssize_t __vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_readv(struct file *, const struct iovec __user *, -- cgit v1.2.3 From 5d5d568975307877e9195f5305f4240e506a2807 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 3 Apr 2015 15:41:18 -0400 Subject: make new_sync_{read,write}() static All places outside of core VFS that checked ->read and ->write for being NULL or called the methods directly are gone now, so NULL {read,write} with non-NULL {read,write}_iter will do the right thing in all cases. Signed-off-by: Al Viro --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 4a1cb00bd805..cade1304d27b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2576,8 +2576,6 @@ extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *, loff extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); -extern ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); -extern ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos); ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos); -- cgit v1.2.3 From 9a219bc70b54523c8f14a3c957179854785ee97c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 3 Apr 2015 22:10:20 -0400 Subject: kill do_sync_read/do_sync_write all remaining instances of aio_{read,write} (all 4 of them) have explicit ->read and ->write resp.; do_sync_read/do_sync_write is never called by __vfs_read/__vfs_write anymore and no other users had been left. Signed-off-by: Al Viro --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index cade1304d27b..83e122c1a902 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2574,8 +2574,6 @@ extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *, loff_t); extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); -extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); -extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos); ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos); -- cgit v1.2.3 From 8436318205b9f29e45db88850ec60e326327e241 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 4 Apr 2015 01:14:53 -0400 Subject: ->aio_read and ->aio_write removed no remaining users Signed-off-by: Al Viro --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 83e122c1a902..f1e3f65255a8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1562,8 +1562,6 @@ struct file_operations { loff_t (*llseek) (struct file *, loff_t, int); ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); - ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); - ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); int (*iterate) (struct file *, struct dir_context *); -- cgit v1.2.3 From bd8e0ff956456ad9071dbb6c2ed7d33bd22fc216 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 17 Mar 2015 14:04:02 -0700 Subject: new helper: iov_iter_rw() Get either READ or WRITE out of iter->type. Signed-off-by: Omar Sandoval Signed-off-by: Al Viro --- include/linux/uio.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 15f11fb9fff6..8b01e1c3c614 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -111,6 +111,14 @@ static inline bool iter_is_iovec(struct iov_iter *i) return !(i->type & (ITER_BVEC | ITER_KVEC)); } +/* + * Get one of READ or WRITE out of iter->type without any other flags OR'd in + * with it. + * + * The ?: is just for type safety. + */ +#define iov_iter_rw(i) ((0 ? (struct iov_iter *)0 : (i))->type & RW_MASK) + /* * Cap the iov_iter by given limit; note that the second argument is * *not* the new size - it's upper limit for such. Passing it a value -- cgit v1.2.3 From 17f8c842d24ac054e4212c82b5bd6ae455a334f3 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 16 Mar 2015 04:33:50 -0700 Subject: Remove rw from {,__,do_}blockdev_direct_IO() Most filesystems call through to these at some point, so we'll start here. Signed-off-by: Omar Sandoval Signed-off-by: Al Viro --- include/linux/fs.h | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index f1e3f65255a8..c67b6de8be33 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2634,16 +2634,18 @@ enum { void dio_end_io(struct bio *bio, int error); -ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, - struct block_device *bdev, struct iov_iter *iter, loff_t offset, - get_block_t get_block, dio_iodone_t end_io, - dio_submit_t submit_io, int flags); - -static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, - struct inode *inode, struct iov_iter *iter, loff_t offset, - get_block_t get_block) -{ - return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iter, +ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, + struct block_device *bdev, struct iov_iter *iter, + loff_t offset, get_block_t get_block, + dio_iodone_t end_io, dio_submit_t submit_io, + int flags); + +static inline ssize_t blockdev_direct_IO(struct kiocb *iocb, + struct inode *inode, + struct iov_iter *iter, loff_t offset, + get_block_t get_block) +{ + return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, offset, get_block, NULL, NULL, DIO_LOCKING | DIO_SKIP_HOLES); } -- cgit v1.2.3 From a95cd6311512bd954e88684eb39373f7f4b0a984 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 16 Mar 2015 04:33:51 -0700 Subject: Remove rw from dax_{do_,}io() And use iov_iter_rw() instead. Signed-off-by: Omar Sandoval Signed-off-by: Al Viro --- include/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index c67b6de8be33..295bc589fe1b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2609,8 +2609,8 @@ extern loff_t fixed_size_llseek(struct file *file, loff_t offset, extern int generic_file_open(struct inode * inode, struct file * filp); extern int nonseekable_open(struct inode * inode, struct file * filp); -ssize_t dax_do_io(int rw, struct kiocb *, struct inode *, struct iov_iter *, - loff_t, get_block_t, dio_iodone_t, int flags); +ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t, + get_block_t, dio_iodone_t, int flags); int dax_clear_blocks(struct inode *, sector_t block, long size); int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); int dax_truncate_page(struct inode *, loff_t from, get_block_t); -- cgit v1.2.3 From 22c6186ecea0be9eff1c399298ad36e94a59995f Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 16 Mar 2015 04:33:53 -0700 Subject: direct_IO: remove rw from a_ops->direct_IO() Now that no one is using rw, remove it completely. Signed-off-by: Omar Sandoval Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- include/linux/nfs_fs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 295bc589fe1b..72e3759de8c3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -383,7 +383,7 @@ struct address_space_operations { void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, gfp_t); void (*freepage)(struct page *); - ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset); + ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset); /* * migrate the contents of a page to the specified target. If * migrate_mode is MIGRATE_ASYNC, it must not block. diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b01ccf371fdc..3d1b0d2fe55e 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -447,7 +447,7 @@ static inline struct rpc_cred *nfs_file_cred(struct file *file) /* * linux/fs/nfs/direct.c */ -extern ssize_t nfs_direct_IO(int, struct kiocb *, struct iov_iter *, loff_t); +extern ssize_t nfs_direct_IO(struct kiocb *, struct iov_iter *, loff_t); extern ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, loff_t pos); -- cgit v1.2.3 From 0fa6b005afdb3152ce85df963302e59b61115f9b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 4 Apr 2015 04:05:48 -0400 Subject: generic_write_checks(): drop isblk argument all remaining callers are passing 0; some just obscure that fact. Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 72e3759de8c3..492948ea4c9b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2566,7 +2566,7 @@ extern int sb_min_blocksize(struct super_block *, int); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); -int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); +int generic_write_checks(struct file *file, loff_t *pos, size_t *count); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); -- cgit v1.2.3 From 3309dd04cbcd2cdad168485af5cf3576b5051e49 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 9 Apr 2015 12:55:47 -0400 Subject: switch generic_write_checks() to iocb and iter ... returning -E... upon error and amount of data left in iter after (possible) truncation upon success. Note, that normal case gives a non-zero (positive) return value, so any tests for != 0 _must_ be updated. Signed-off-by: Al Viro Conflicts: fs/ext4/file.c --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index c7b21db7782f..b4aa400ac723 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2566,7 +2566,7 @@ extern int sb_min_blocksize(struct super_block *, int); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); -int generic_write_checks(struct file *file, loff_t *pos, size_t *count); +extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); -- cgit v1.2.3 From 2ba48ce513c4e545318d22b138861d5876edf906 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 9 Apr 2015 13:52:01 -0400 Subject: mirror O_APPEND and O_DIRECT into iocb->ki_flags ... avoiding write_iter/fcntl races. Signed-off-by: Al Viro --- include/linux/fs.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index b4aa400ac723..b1d7db28c13c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -315,6 +315,8 @@ struct address_space; struct writeback_control; #define IOCB_EVENTFD (1 << 0) +#define IOCB_APPEND (1 << 1) +#define IOCB_DIRECT (1 << 2) struct kiocb { struct file *ki_filp; @@ -329,10 +331,13 @@ static inline bool is_sync_kiocb(struct kiocb *kiocb) return kiocb->ki_complete == NULL; } +static inline int iocb_flags(struct file *file); + static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) { *kiocb = (struct kiocb) { .ki_filp = filp, + .ki_flags = iocb_flags(filp), }; } @@ -2779,6 +2784,16 @@ static inline bool io_is_direct(struct file *filp) return (filp->f_flags & O_DIRECT) || IS_DAX(file_inode(filp)); } +static inline int iocb_flags(struct file *file) +{ + int res = 0; + if (file->f_flags & O_APPEND) + res |= IOCB_APPEND; + if (io_is_direct(file)) + res |= IOCB_DIRECT; + return res; +} + static inline ino_t parent_ino(struct dentry *dentry) { ino_t res; -- cgit v1.2.3 From 9058f3b326dbe8cd2ebea7f3cfe367b0d101039b Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sat, 11 Apr 2015 21:45:22 +0200 Subject: Remove rest of exec domains. It is gone from all archs, now we can remove the final bits. Signed-off-by: Richard Weinberger --- include/linux/personality.h | 40 +--------------------------------------- include/linux/sched.h | 6 ------ 2 files changed, 1 insertion(+), 45 deletions(-) (limited to 'include/linux') diff --git a/include/linux/personality.h b/include/linux/personality.h index 646c0a7d50fa..aeb7892b2468 100644 --- a/include/linux/personality.h +++ b/include/linux/personality.h @@ -3,52 +3,14 @@ #include - -/* - * Handling of different ABIs (personalities). - */ - -struct exec_domain; -struct pt_regs; - -extern int register_exec_domain(struct exec_domain *); -extern int unregister_exec_domain(struct exec_domain *); -extern int __set_personality(unsigned int); - - -/* - * Description of an execution domain. - * - * The first two members are refernced from assembly source - * and should stay where they are unless explicitly needed. - */ -typedef void (*handler_t)(int, struct pt_regs *); - -struct exec_domain { - const char *name; /* name of the execdomain */ - handler_t handler; /* handler for syscalls */ - unsigned char pers_low; /* lowest personality */ - unsigned char pers_high; /* highest personality */ - unsigned long *signal_map; /* signal mapping */ - unsigned long *signal_invmap; /* reverse signal mapping */ - struct map_segment *err_map; /* error mapping */ - struct map_segment *socktype_map; /* socket type mapping */ - struct map_segment *sockopt_map; /* socket option mapping */ - struct map_segment *af_map; /* address family mapping */ - struct module *module; /* module context of the ed. */ - struct exec_domain *next; /* linked list (internal) */ -}; - /* * Return the base personality without flags. */ #define personality(pers) (pers & PER_MASK) - /* * Change personality of the currently running process. */ -#define set_personality(pers) \ - ((current->personality == (pers)) ? 0 : __set_personality(pers)) +#define set_personality(pers) (current->personality = (pers)) #endif /* _LINUX_PERSONALITY_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index a419b65770d6..14d9117ac463 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -125,7 +125,6 @@ struct sched_attr { u64 sched_period; }; -struct exec_domain; struct futex_pi_state; struct robust_list_head; struct bio_list; @@ -2288,11 +2287,6 @@ extern void set_curr_task(int cpu, struct task_struct *p); void yield(void); -/* - * The default (Linux) execution domain. - */ -extern struct exec_domain default_exec_domain; - union thread_union { struct thread_info thread_info; unsigned long stack[THREAD_SIZE/sizeof(long)]; -- cgit v1.2.3 From 00df35f991914db6b8bde8cf09808e19a9cffc3d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 12 Apr 2015 08:06:55 -0700 Subject: cpu: Defer smpboot kthread unparking until CPU known to scheduler Currently, smpboot_unpark_threads() is invoked before the incoming CPU has been added to the scheduler's runqueue structures. This might potentially cause the unparked kthread to run on the wrong CPU, since the correct CPU isn't fully set up yet. That causes a sporadic, hard to debug boot crash triggering on some systems, reported by Borislav Petkov, and bisected down to: 2a442c9c6453 ("x86: Use common outgoing-CPU-notification code") This patch places smpboot_unpark_threads() in a CPU hotplug notifier with priority set so that these kthreads are unparked just after the CPU has been added to the runqueues. Reported-and-tested-by: Borislav Petkov Signed-off-by: Paul E. McKenney Cc: Andrew Morton Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/cpu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index d028721748d4..091badf0f6ba 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -73,6 +73,7 @@ enum { /* migration should happen before other stuff but after perf */ CPU_PRI_PERF = 20, CPU_PRI_MIGRATION = 10, + CPU_PRI_SMPBOOT = 9, /* bring up workqueues before normal notifiers and down after */ CPU_PRI_WORKQUEUE_UP = 5, CPU_PRI_WORKQUEUE_DOWN = -5, @@ -165,6 +166,7 @@ static inline void __unregister_cpu_notifier(struct notifier_block *nb) } #endif +void smpboot_thread_init(void); int cpu_up(unsigned int cpu); void notify_cpu_starting(unsigned int cpu); extern void cpu_maps_update_begin(void); -- cgit v1.2.3 From 590ee7dbd569a012df705a5204fc5f1066f52b8c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 13 Apr 2015 10:04:59 +0200 Subject: cpu: Provide smpboot_thread_init() on !CONFIG_SMP kernels as well Now that we are using smpboot_thread_init() in init/main.c as well, provide it for !CONFIG_SMP as well. This addresses a !CONFIG_SMP build failure. Cc: Paul E. McKenney Cc: Borislav Petkov Cc: Andrew Morton Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/cpu.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 091badf0f6ba..c0fb6b1b4712 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -214,6 +214,10 @@ static inline void cpu_notifier_register_done(void) { } +static inline void smpboot_thread_init(void) +{ +} + #endif /* CONFIG_SMP */ extern struct bus_type cpu_subsys; -- cgit v1.2.3 From 9e1a27ea42691429e31f158cce6fc61bc79bb2e9 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 13 Apr 2015 21:03:49 +0930 Subject: virtio_ring: Update weak barriers to use dma_wmb/rmb This change makes it so that instead of using smp_wmb/rmb which varies depending on the kernel configuration we can can use dma_wmb/rmb which for most architectures should be equal to or slightly more strict than smp_wmb/rmb. The advantage to this is that these barriers are available to uniprocessor builds as well so the performance should improve under such a configuration. Signed-off-by: Alexander Duyck Signed-off-by: Rusty Russell --- include/linux/virtio_ring.h | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index 67e06fe18c03..8e50888a6d59 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -21,19 +21,20 @@ * actually quite cheap. */ -#ifdef CONFIG_SMP static inline void virtio_mb(bool weak_barriers) { +#ifdef CONFIG_SMP if (weak_barriers) smp_mb(); else +#endif mb(); } static inline void virtio_rmb(bool weak_barriers) { if (weak_barriers) - smp_rmb(); + dma_rmb(); else rmb(); } @@ -41,26 +42,10 @@ static inline void virtio_rmb(bool weak_barriers) static inline void virtio_wmb(bool weak_barriers) { if (weak_barriers) - smp_wmb(); + dma_wmb(); else wmb(); } -#else -static inline void virtio_mb(bool weak_barriers) -{ - mb(); -} - -static inline void virtio_rmb(bool weak_barriers) -{ - rmb(); -} - -static inline void virtio_wmb(bool weak_barriers) -{ - wmb(); -} -#endif struct virtio_device; struct virtqueue; -- cgit v1.2.3 From e2c6544829f8df396a0a233c86d5ee78f405ffef Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 10 Apr 2015 15:52:37 +0200 Subject: e1000e: Move pm_qos_req to e1000e adapter e1000e is the only driver requiring pm_qos_req, instead of causing every device to waste up to 240 bytes. Allocate it for the specific driver. Signed-off-by: Thomas Graf Acked-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 13acb3d8ecdd..733d24f16cf8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1751,7 +1751,6 @@ struct net_device { #endif struct phy_device *phydev; struct lock_class_key *qdisc_tx_busylock; - struct pm_qos_request pm_qos_req; }; #define to_net_dev(d) container_of(d, struct net_device, dev) -- cgit v1.2.3 From 14ffbbb8dab670393a8718adb7b0a385b23fa7bc Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 10 Apr 2015 15:52:38 +0200 Subject: net_device: Reorder members to fill holes Some trivial reorders while preserving the RX/TX cache lines split to fill a couple of holes. Signed-off-by: Thomas Graf Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 45 +++++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 733d24f16cf8..b5679aed660b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1313,6 +1313,8 @@ enum netdev_priv_flags { * @base_addr: Device I/O address * @irq: Device IRQ number * + * @carrier_changes: Stats to monitor carrier on<->off transitions + * * @state: Generic network queuing layer state, see netdev_state_t * @dev_list: The global list of network devices * @napi_list: List entry, that is used for polling napi devices @@ -1346,8 +1348,6 @@ enum netdev_priv_flags { * @tx_dropped: Dropped packets by core network, * do not use this in drivers * - * @carrier_changes: Stats to monitor carrier on<->off transitions - * * @wireless_handlers: List of functions to handle Wireless Extensions, * instead of ioctl, * see for details. @@ -1390,14 +1390,14 @@ enum netdev_priv_flags { * @dev_port: Used to differentiate devices that share * the same function * @addr_list_lock: XXX: need comments on this one - * @uc: unicast mac addresses - * @mc: multicast mac addresses - * @dev_addrs: list of device hw addresses - * @queues_kset: Group of all Kobjects in the Tx and RX queues * @uc_promisc: Counter, that indicates, that promiscuous mode * has been enabled due to the need to listen to * additional unicast addresses in a device that * does not implement ndo_set_rx_mode() + * @uc: unicast mac addresses + * @mc: multicast mac addresses + * @dev_addrs: list of device hw addresses + * @queues_kset: Group of all Kobjects in the Tx and RX queues * @promiscuity: Number of times, the NIC is told to work in * Promiscuous mode, if it becomes 0 the NIC will * exit from working in Promiscuous mode @@ -1427,6 +1427,12 @@ enum netdev_priv_flags { * @ingress_queue: XXX: need comments on this one * @broadcast: hw bcast address * + * @rx_cpu_rmap: CPU reverse-mapping for RX completion interrupts, + * indexed by RX queue number. Assigned by driver. + * This must only be set if the ndo_rx_flow_steer + * operation is defined + * @index_hlist: Device index hash chain + * * @_tx: Array of TX queues * @num_tx_queues: Number of TX queues allocated at alloc_netdev_mq() time * @real_num_tx_queues: Number of TX queues currently active in device @@ -1436,11 +1442,6 @@ enum netdev_priv_flags { * * @xps_maps: XXX: need comments on this one * - * @rx_cpu_rmap: CPU reverse-mapping for RX completion interrupts, - * indexed by RX queue number. Assigned by driver. - * This must only be set if the ndo_rx_flow_steer - * operation is defined - * * @trans_start: Time (in jiffies) of last Tx * @watchdog_timeo: Represents the timeout that is used by * the watchdog ( see dev_watchdog() ) @@ -1448,7 +1449,6 @@ enum netdev_priv_flags { * * @pcpu_refcnt: Number of references to this device * @todo_list: Delayed register/unregister - * @index_hlist: Device index hash chain * @link_watch_list: XXX: need comments on this one * * @reg_state: Register/unregister state machine @@ -1515,6 +1515,8 @@ struct net_device { unsigned long base_addr; int irq; + atomic_t carrier_changes; + /* * Some hardware also needs these fields (state,dev_list, * napi_list,unreg_list,close_list) but they are not @@ -1555,8 +1557,6 @@ struct net_device { atomic_long_t rx_dropped; atomic_long_t tx_dropped; - atomic_t carrier_changes; - #ifdef CONFIG_WIRELESS_EXT const struct iw_handler_def * wireless_handlers; struct iw_public_data * wireless_data; @@ -1596,6 +1596,8 @@ struct net_device { unsigned short dev_id; unsigned short dev_port; spinlock_t addr_list_lock; + unsigned char name_assign_type; + bool uc_promisc; struct netdev_hw_addr_list uc; struct netdev_hw_addr_list mc; struct netdev_hw_addr_list dev_addrs; @@ -1603,10 +1605,6 @@ struct net_device { #ifdef CONFIG_SYSFS struct kset *queues_kset; #endif - - unsigned char name_assign_type; - - bool uc_promisc; unsigned int promiscuity; unsigned int allmulti; @@ -1653,7 +1651,10 @@ struct net_device { struct netdev_queue __rcu *ingress_queue; unsigned char broadcast[MAX_ADDR_LEN]; - +#ifdef CONFIG_RFS_ACCEL + struct cpu_rmap *rx_cpu_rmap; +#endif + struct hlist_node index_hlist; /* * Cache lines mostly used on transmit path @@ -1664,13 +1665,11 @@ struct net_device { struct Qdisc *qdisc; unsigned long tx_queue_len; spinlock_t tx_global_lock; + int watchdog_timeo; #ifdef CONFIG_XPS struct xps_dev_maps __rcu *xps_maps; #endif -#ifdef CONFIG_RFS_ACCEL - struct cpu_rmap *rx_cpu_rmap; -#endif /* These may be needed for future network-power-down code. */ @@ -1680,13 +1679,11 @@ struct net_device { */ unsigned long trans_start; - int watchdog_timeo; struct timer_list watchdog_timer; int __percpu *pcpu_refcnt; struct list_head todo_list; - struct hlist_node index_hlist; struct list_head link_watch_list; enum { NETREG_UNINITIALIZED=0, -- cgit v1.2.3 From 4577139b2dabf58973d59d157aae4ddd3bde863a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 10 Apr 2015 23:07:54 +0200 Subject: net: use jump label patching for ingress qdisc in __netif_receive_skb_core Even if we make use of classifier and actions from the egress path, we're going into handle_ing() executing additional code on a per-packet cost for ingress qdisc, just to realize that nothing is attached on ingress. Instead, this can just be blinded out as a no-op entirely with the use of a static key. On input fast-path, we already make use of static keys in various places, e.g. skb time stamping, in RPS, etc. It makes sense to not waste time when we're assured that no ingress qdisc is attached anywhere. Enabling/disabling of that code path is being done via two helpers, namely net_{inc,dec}_ingress_queue(), that are being invoked under RTNL mutex when a ingress qdisc is being either initialized or destructed. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 5db76a32fcab..2da5d1081ad9 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -77,7 +77,20 @@ static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) return rtnl_dereference(dev->ingress_queue); } -extern struct netdev_queue *dev_ingress_queue_create(struct net_device *dev); +struct netdev_queue *dev_ingress_queue_create(struct net_device *dev); + +#ifdef CONFIG_NET_CLS_ACT +void net_inc_ingress_queue(void); +void net_dec_ingress_queue(void); +#else +static inline void net_inc_ingress_queue(void) +{ +} + +static inline void net_dec_ingress_queue(void) +{ +} +#endif extern void rtnetlink_init(void); extern void __rtnl_unlock(void); -- cgit v1.2.3 From 24477e57412a7a7dea62637ac990bc5c1cff0665 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 8 Apr 2015 19:41:40 +0200 Subject: uapi: ebtables: don't include linux/if.h linux/if.h creates conflicts in userspace with net/if.h By using it here we force userspace to use linux/if.h while net/if.h may be needed. Note that: include/linux/netfilter_ipv4/ip_tables.h and include/linux/netfilter_ipv6/ip6_tables.h don't include linux/if.h and they also refer to IFNAMSIZ, so they are expecting userspace to include use net/if.h from the client program. Signed-off-by: Arturo Borrero Gonzalez Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge/ebtables.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 34e7a2b7f867..f1bd3962e6b6 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -12,9 +12,10 @@ #ifndef __LINUX_BRIDGE_EFF_H #define __LINUX_BRIDGE_EFF_H +#include +#include #include - /* return values for match() functions */ #define EBT_MATCH 0 #define EBT_NOMATCH 1 -- cgit v1.2.3 From 0392d099ab29767b8d7d4d1d29e8020f8abe943f Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 13 Apr 2015 00:52:35 +0200 Subject: netlink: Fix portid type in netlink_notify portid is an unsigned integer. Fix netlink_notify to match all other portid user in the kernel. Signed-off-by: Richard Weinberger Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 02fc86d2348e..6835c1279df7 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -134,7 +134,7 @@ struct netlink_callback { struct netlink_notify { struct net *net; - int portid; + u32 portid; int protocol; }; -- cgit v1.2.3 From 84d56e66b9b4a646f04ec30696ca1aeea5e654d5 Mon Sep 17 00:00:00 2001 From: Ulrich Obergfell Date: Tue, 14 Apr 2015 15:43:55 -0700 Subject: watchdog: new definitions and variables, initialization The hardlockup and softockup had always been tied together. Due to the request of KVM folks, they had a need to have one enabled but not the other. Internally rework the code to split things apart more cleanly. There is a bunch of churn here, but the end result should be code that should be easier to maintain and fix without knowing the internals of what is going on. This patch (of 9): Introduce new definitions and variables to separate the user interface in /proc/sys/kernel from the internal run state of the lockup detectors. The internal run state is represented by two bits in a new variable that is named 'watchdog_enabled'. This helps simplify the code, for example: - In order to check if any of the two lockup detectors is enabled, it is sufficient to check if 'watchdog_enabled' is not zero. - In order to enable/disable one or both lockup detectors, it is sufficient to set/clear one or both bits in 'watchdog_enabled'. - Concurrent updates of 'watchdog_enabled' need not be synchronized via a spinlock or a mutex. Updates can either be atomic or concurrency can be detected by using 'cmpxchg'. Signed-off-by: Ulrich Obergfell Signed-off-by: Don Zickus Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nmi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 9b2022ab4d85..3885a7d12bd2 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -68,6 +68,8 @@ static inline bool trigger_allbutself_cpu_backtrace(void) #ifdef CONFIG_LOCKUP_DETECTOR int hw_nmi_is_cpu_stuck(struct pt_regs *); u64 hw_nmi_get_sample_period(int watchdog_thresh); +extern int nmi_watchdog_enabled; +extern int soft_watchdog_enabled; extern int watchdog_user_enabled; extern int watchdog_thresh; extern int sysctl_softlockup_all_cpu_backtrace; -- cgit v1.2.3 From 83a80a39075a9ded23df1e26a4b617c289077630 Mon Sep 17 00:00:00 2001 From: Ulrich Obergfell Date: Tue, 14 Apr 2015 15:44:08 -0700 Subject: watchdog: introduce separate handlers for parameters in /proc/sys/kernel Separate handlers for each watchdog parameter in /proc/sys/kernel replace the proc_dowatchdog() function. Three of those handlers merely call proc_watchdog_common() with one different argument. Signed-off-by: Ulrich Obergfell Signed-off-by: Don Zickus Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nmi.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 3885a7d12bd2..5b5450585b8a 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -74,6 +74,14 @@ extern int watchdog_user_enabled; extern int watchdog_thresh; extern int sysctl_softlockup_all_cpu_backtrace; struct ctl_table; +extern int proc_watchdog(struct ctl_table *, int , + void __user *, size_t *, loff_t *); +extern int proc_nmi_watchdog(struct ctl_table *, int , + void __user *, size_t *, loff_t *); +extern int proc_soft_watchdog(struct ctl_table *, int , + void __user *, size_t *, loff_t *); +extern int proc_watchdog_thresh(struct ctl_table *, int , + void __user *, size_t *, loff_t *); extern int proc_dowatchdog(struct ctl_table *, int , void __user *, size_t *, loff_t *); #endif -- cgit v1.2.3 From 195daf665a6299de98a4da3843fed2dd9de19d3a Mon Sep 17 00:00:00 2001 From: Ulrich Obergfell Date: Tue, 14 Apr 2015 15:44:13 -0700 Subject: watchdog: enable the new user interface of the watchdog mechanism With the current user interface of the watchdog mechanism it is only possible to disable or enable both lockup detectors at the same time. This series introduces new kernel parameters and changes the semantics of some existing kernel parameters, so that the hard lockup detector and the soft lockup detector can be disabled or enabled individually. With this series applied, the user interface is as follows. - parameters in /proc/sys/kernel . soft_watchdog This is a new parameter to control and examine the run state of the soft lockup detector. . nmi_watchdog The semantics of this parameter have changed. It can now be used to control and examine the run state of the hard lockup detector. . watchdog This parameter is still available to control the run state of both lockup detectors at the same time. If this parameter is examined, it shows the logical OR of soft_watchdog and nmi_watchdog. . watchdog_thresh The semantics of this parameter are not affected by the patch. - kernel command line parameters . nosoftlockup The semantics of this parameter have changed. It can now be used to disable the soft lockup detector at boot time. . nmi_watchdog=0 or nmi_watchdog=1 Disable or enable the hard lockup detector at boot time. The patch introduces '=1' as a new option. . nowatchdog The semantics of this parameter are not affected by the patch. It is still available to disable both lockup detectors at boot time. Also, remove the proc_dowatchdog() function which is no longer needed. [dzickus@redhat.com: wrote changelog] [dzickus@redhat.com: update documentation for kernel params and sysctl] Signed-off-by: Ulrich Obergfell Signed-off-by: Don Zickus Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nmi.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 5b5450585b8a..0426357297d5 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -82,8 +82,6 @@ extern int proc_soft_watchdog(struct ctl_table *, int , void __user *, size_t *, loff_t *); extern int proc_watchdog_thresh(struct ctl_table *, int , void __user *, size_t *, loff_t *); -extern int proc_dowatchdog(struct ctl_table *, int , - void __user *, size_t *, loff_t *); #endif #ifdef CONFIG_HAVE_ACPI_APEI_NMI -- cgit v1.2.3 From 692297d8f96887f836d9049a653ed05a71cf48fb Mon Sep 17 00:00:00 2001 From: Ulrich Obergfell Date: Tue, 14 Apr 2015 15:44:19 -0700 Subject: watchdog: introduce the hardlockup_detector_disable() function Have kvm_guest_init() use hardlockup_detector_disable() instead of watchdog_enable_hardlockup_detector(false). Remove the watchdog_hardlockup_detector_is_enabled() and the watchdog_enable_hardlockup_detector() function which are no longer needed. Signed-off-by: Ulrich Obergfell Signed-off-by: Don Zickus Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nmi.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 0426357297d5..3d46fb4708e0 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -25,16 +25,11 @@ static inline void touch_nmi_watchdog(void) #endif #if defined(CONFIG_HARDLOCKUP_DETECTOR) -extern void watchdog_enable_hardlockup_detector(bool val); -extern bool watchdog_hardlockup_detector_is_enabled(void); +extern void hardlockup_detector_disable(void); #else -static inline void watchdog_enable_hardlockup_detector(bool val) +static inline void hardlockup_detector_disable(void) { } -static inline bool watchdog_hardlockup_detector_is_enabled(void) -{ - return true; -} #endif /* -- cgit v1.2.3 From 124dee09f0669b92cc0073b00984d53541ca0884 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 14 Apr 2015 15:44:28 -0700 Subject: mm, slab: correct config option in comment CONFIG_SLAB_DEBUG doesn't exist, CONFIG_DEBUG_SLAB does. Signed-off-by: David Rientjes Cc: Christoph Lameter Cc: Pekka Enberg Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 76f1feeabd38..ffd24c830151 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -18,7 +18,7 @@ /* * Flags to pass to kmem_cache_create(). - * The ones marked DEBUG are only valid if CONFIG_SLAB_DEBUG is set. + * The ones marked DEBUG are only valid if CONFIG_DEBUG_SLAB is set. */ #define SLAB_DEBUG_FREE 0x00000100UL /* DEBUG: Perform (expensive) checks on free */ #define SLAB_RED_ZONE 0x00000400UL /* DEBUG: Red zone objs in a cache */ -- cgit v1.2.3 From 84d33df279e0380995b0e03fb8aad04cef2bc29f Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 14 Apr 2015 15:44:37 -0700 Subject: mm: rename FOLL_MLOCK to FOLL_POPULATE After commit a1fde08c74e9 ("VM: skip the stack guard page lookup in get_user_pages only for mlock") FOLL_MLOCK has lost its original meaning: we don't necessarily mlock the page if the flags is set -- we also take VM_LOCKED into consideration. Since we use the same codepath for __mm_populate(), let's rename FOLL_MLOCK to FOLL_POPULATE. Signed-off-by: Kirill A. Shutemov Acked-by: Linus Torvalds Acked-by: David Rientjes Cc: Michel Lespinasse Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 47a93928b90f..cccbbba12b9d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2109,7 +2109,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma, #define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ #define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO * and return without waiting upon it */ -#define FOLL_MLOCK 0x40 /* mark page as mlocked */ +#define FOLL_POPULATE 0x40 /* fault in page */ #define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */ #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ #define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ -- cgit v1.2.3 From 30467e0b3be83c286d60039f8267dd421128ca74 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 14 Apr 2015 15:45:11 -0700 Subject: mm, hotplug: fix concurrent memory hot-add deadlock There's a deadlock when concurrently hot-adding memory through the probe interface and switching a memory block from offline to online. When hot-adding memory via the probe interface, add_memory() first takes mem_hotplug_begin() and then device_lock() is later taken when registering the newly initialized memory block. This creates a lock dependency of (1) mem_hotplug.lock (2) dev->mutex. When switching a memory block from offline to online, dev->mutex is first grabbed in device_online() when the write(2) transitions an existing memory block from offline to online, and then online_pages() will take mem_hotplug_begin(). This creates a lock inversion between mem_hotplug.lock and dev->mutex. Vitaly reports that this deadlock can happen when kworker handling a probe event races with systemd-udevd switching a memory block's state. This patch requires the state transition to take mem_hotplug_begin() before dev->mutex. Hot-adding memory via the probe interface creates a memory block while holding mem_hotplug_begin(), there is no way to take dev->mutex first in this case. online_pages() and offline_pages() are only called when transitioning memory block state. We now require that mem_hotplug_begin() is taken before calling them -- this requires exporting the mem_hotplug_begin() and mem_hotplug_done() to generic code. In all hot-add and hot-remove cases, mem_hotplug_begin() is done prior to device_online(). This is all that is needed to avoid the deadlock. Signed-off-by: David Rientjes Reported-by: Vitaly Kuznetsov Tested-by: Vitaly Kuznetsov Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: "K. Y. Srinivasan" Cc: Yasuaki Ishimatsu Cc: Tang Chen Cc: Vlastimil Babka Cc: Zhang Zhen Cc: Vladimir Davydov Cc: Wang Nan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 8f1a41951df9..6ffa0ac7f7d6 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -192,6 +192,9 @@ extern void get_page_bootmem(unsigned long ingo, struct page *page, void get_online_mems(void); void put_online_mems(void); +void mem_hotplug_begin(void); +void mem_hotplug_done(void); + #else /* ! CONFIG_MEMORY_HOTPLUG */ /* * Stub functions for when hotplug is off @@ -231,6 +234,9 @@ static inline int try_online_node(int nid) static inline void get_online_mems(void) {} static inline void put_online_mems(void) {} +static inline void mem_hotplug_begin(void) {} +static inline void mem_hotplug_done(void) {} + #endif /* ! CONFIG_MEMORY_HOTPLUG */ #ifdef CONFIG_MEMORY_HOTREMOVE -- cgit v1.2.3 From b9ea25152e56365ce149b9a39637cd7a16eec556 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 14 Apr 2015 15:45:27 -0700 Subject: page_writeback: clean up mess around cancel_dirty_page() This patch replaces cancel_dirty_page() with a helper function account_page_cleaned() which only updates counters. It's called from truncate_complete_page() and from try_to_free_buffers() (hack for ext3). Page is locked in both cases, page-lock protects against concurrent dirtiers: see commit 2d6d7f982846 ("mm: protect set_page_dirty() from ongoing truncation"). Delete_from_page_cache() shouldn't be called for dirty pages, they must be handled by caller (either written or truncated). This patch treats final dirty accounting fixup at the end of __delete_from_page_cache() as a debug check and adds WARN_ON_ONCE() around it. If something removes dirty pages without proper handling that might be a bug and unwritten data might be lost. Hugetlbfs has no dirty pages accounting, ClearPageDirty() is enough here. cancel_dirty_page() in nfs_wb_page_cancel() is redundant. This is helper for nfs_invalidate_page() and it's called only in case complete invalidation. The mess was started in v2.6.20 after commits 46d2277c796f ("Clean up and make try_to_free_buffers() not race with dirty pages") and 3e67c0987d75 ("truncate: clear page dirtiness before running try_to_free_buffers()") first was reverted right in v2.6.20 in commit ecdfc9787fe5 ("Resurrect 'try_to_free_buffers()' VM hackery"), second in v2.6.25 commit a2b345642f53 ("Fix dirty page accounting leak with ext3 data=journal"). Custom fixes were introduced between these points. NFS in v2.6.23, commit 1b3b4a1a2deb ("NFS: Fix a write request leak in nfs_invalidate_page()"). Kludge in __delete_from_page_cache() in v2.6.24, commit 3a6927906f1b ("Do dirty page accounting when removing a page from the page cache"). Since v2.6.25 all of them are redundant. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Konstantin Khlebnikov Cc: Tejun Heo Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 ++ include/linux/page-flags.h | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index cccbbba12b9d..6571dd78e984 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1294,9 +1294,11 @@ int __set_page_dirty_no_writeback(struct page *page); int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page); void account_page_dirtied(struct page *page, struct address_space *mapping); +void account_page_cleaned(struct page *page, struct address_space *mapping); int set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); int clear_page_dirty_for_io(struct page *page); + int get_cmdline(struct task_struct *task, char *buffer, int buflen); /* Is the vma a continuation of the stack vma above it? */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 5ed7bdaf22d5..c851ff92d5b3 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -328,8 +328,6 @@ static inline void SetPageUptodate(struct page *page) CLEARPAGEFLAG(Uptodate, uptodate) -extern void cancel_dirty_page(struct page *page, unsigned int account_size); - int test_clear_page_writeback(struct page *page); int __test_set_page_writeback(struct page *page, bool keep_write); -- cgit v1.2.3 From d1bfcdb8ce0ea6eb6034daa7ff02548e0bc9c21b Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 14 Apr 2015 15:45:30 -0700 Subject: mm: hide per-cpu lists in output of show_mem() This makes show_mem() much less verbose on huge machines. Instead of huge and almost useless dump of counters for each per-zone per-cpu lists this patch prints the sum of these counters for each zone (free_pcp) and size of per-cpu list for current cpu (local_pcp). The filter flag SHOW_MEM_PERCPU_LISTS reverts to the old verbose mode. [akpm@linux-foundation.org: update show_free_areas comment] Signed-off-by: Konstantin Khlebnikov Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 6571dd78e984..9c21b42d07bf 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1126,6 +1126,7 @@ extern void pagefault_out_of_memory(void); * various contexts. */ #define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */ +#define SHOW_MEM_PERCPU_LISTS (0x0002u) /* per-zone per-cpu */ extern void show_free_areas(unsigned int flags); extern bool skip_free_areas_node(unsigned int flags, int nid); -- cgit v1.2.3 From 761b06771adeeb734e9eebc6f70f916cb9e2f643 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 14 Apr 2015 15:45:32 -0700 Subject: mm: completely remove dumping per-cpu lists from show_mem() It seems nobody needs this. Signed-off-by: Konstantin Khlebnikov Cc: Michal Hocko Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9c21b42d07bf..6571dd78e984 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1126,7 +1126,6 @@ extern void pagefault_out_of_memory(void); * various contexts. */ #define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */ -#define SHOW_MEM_PERCPU_LISTS (0x0002u) /* per-zone per-cpu */ extern void show_free_areas(unsigned int flags); extern bool skip_free_areas_node(unsigned int flags, int nid); -- cgit v1.2.3 From 5a3fbef325e872f831cf13171c7032915bb1916d Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 14 Apr 2015 15:46:21 -0700 Subject: mm: do not add nr_pmds into mm_struct if PMD is folded CONFIG_PGTABLE_LEVELS is now available on every architecture and we can use it to check if we need to add nr_pmds into mm_struct. Signed-off-by: Kirill A. Shutemov Tested-by: Guenter Roeck Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: "David S. Miller" Cc: "H. Peter Anvin" Cc: "James E.J. Bottomley" Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Chris Metcalf Cc: David Howells Cc: Fenghua Yu Cc: Geert Uytterhoeven Cc: Heiko Carstens Cc: Helge Deller Cc: Ingo Molnar Cc: Jeff Dike Cc: Kirill A. Shutemov Cc: Koichi Yasutake Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Mackerras Cc: Ralf Baechle Cc: Richard Weinberger Cc: Russell King Cc: Thomas Gleixner Cc: Tony Luck Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 199a03aab8dc..590630eb59ba 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -364,7 +364,9 @@ struct mm_struct { atomic_t mm_users; /* How many users with user space? */ atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */ atomic_long_t nr_ptes; /* PTE page table pages */ +#if CONFIG_PGTABLE_LEVELS > 2 atomic_long_t nr_pmds; /* PMD page table pages */ +#endif int map_count; /* number of VMAs */ spinlock_t page_table_lock; /* Protects page tables and some counters */ -- cgit v1.2.3 From 9de1626290eaa7d921413ddc83544bc3bae27283 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Tue, 14 Apr 2015 15:46:42 -0700 Subject: cleancache: zap uuid arg of cleancache_init_shared_fs Use super_block->s_uuid instead. Every shared filesystem using cleancache must now initialize super_block->s_uuid before calling cleancache_init_shared_fs. The only one on the tree, ocfs2, already meets this requirement. Signed-off-by: Vladimir Davydov Cc: Konrad Rzeszutek Wilk Cc: Boris Ostrovsky Cc: David Vrabel Cc: Mark Fasheh Cc: Joel Becker Cc: Stefan Hengelein Cc: Florian Schmaus Cc: Andor Daam Cc: Dan Magenheimer Cc: Bob Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cleancache.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h index 4ce9056b31a8..29657d1c83fb 100644 --- a/include/linux/cleancache.h +++ b/include/linux/cleancache.h @@ -36,7 +36,7 @@ struct cleancache_ops { extern struct cleancache_ops * cleancache_register_ops(struct cleancache_ops *ops); extern void __cleancache_init_fs(struct super_block *); -extern void __cleancache_init_shared_fs(char *, struct super_block *); +extern void __cleancache_init_shared_fs(struct super_block *); extern int __cleancache_get_page(struct page *); extern void __cleancache_put_page(struct page *); extern void __cleancache_invalidate_page(struct address_space *, struct page *); @@ -78,10 +78,10 @@ static inline void cleancache_init_fs(struct super_block *sb) __cleancache_init_fs(sb); } -static inline void cleancache_init_shared_fs(char *uuid, struct super_block *sb) +static inline void cleancache_init_shared_fs(struct super_block *sb) { if (cleancache_enabled) - __cleancache_init_shared_fs(uuid, sb); + __cleancache_init_shared_fs(sb); } static inline int cleancache_get_page(struct page *page) -- cgit v1.2.3 From 53d85c98566535d7bc69470f008d7dcb09a0fec9 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Tue, 14 Apr 2015 15:46:45 -0700 Subject: cleancache: forbid overriding cleancache_ops Currently, cleancache_register_ops returns the previous value of cleancache_ops to allow chaining. However, chaining, as it is implemented now, is extremely dangerous due to possible pool id collisions. Suppose, a new cleancache driver is registered after the previous one assigned an id to a super block. If the new driver assigns the same id to another super block, which is perfectly possible, we will have two different filesystems using the same id. No matter if the new driver implements chaining or not, we are likely to get data corruption with such a configuration eventually. This patch therefore disables the ability to override cleancache_ops altogether as potentially dangerous. If there is already cleancache driver registered, all further calls to cleancache_register_ops will return EBUSY. Since no user of cleancache implements chaining, we only need to make minor changes to the code outside the cleancache core. Signed-off-by: Vladimir Davydov Cc: Konrad Rzeszutek Wilk Cc: Boris Ostrovsky Cc: David Vrabel Cc: Mark Fasheh Cc: Joel Becker Cc: Stefan Hengelein Cc: Florian Schmaus Cc: Andor Daam Cc: Dan Magenheimer Cc: Bob Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cleancache.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h index 29657d1c83fb..b23611f43cfb 100644 --- a/include/linux/cleancache.h +++ b/include/linux/cleancache.h @@ -33,8 +33,7 @@ struct cleancache_ops { void (*invalidate_fs)(int); }; -extern struct cleancache_ops * - cleancache_register_ops(struct cleancache_ops *ops); +extern int cleancache_register_ops(struct cleancache_ops *ops); extern void __cleancache_init_fs(struct super_block *); extern void __cleancache_init_shared_fs(struct super_block *); extern int __cleancache_get_page(struct page *); -- cgit v1.2.3 From 3cb29d11174f29b76addcba4374884b14f8ea4b1 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Tue, 14 Apr 2015 15:46:48 -0700 Subject: cleancache: remove limit on the number of cleancache enabled filesystems The limit equals 32 and is imposed by the number of entries in the fs_poolid_map and shared_fs_poolid_map. Nowadays it is insufficient, because with containers on board a Linux host can have hundreds of active fs mounts. These maps were introduced by commit 49a9ab815acb8 ("mm: cleancache: lazy initialization to allow tmem backends to build/run as modules") in order to allow compiling cleancache drivers as modules. Real pool ids are stored in these maps while super_block->cleancache_poolid points to an entry in the map, so that on cleancache registration we can walk over all (if there are <= 32 of them, of course) cleancache-enabled super blocks and assign real pool ids. Actually, there is absolutely no need in these maps, because we can iterate over all super blocks immediately using iterate_supers. This is not racy, because cleancache_init_ops is called from mount_fs with super_block->s_umount held for writing, while iterate_supers takes this semaphore for reading, so if we call iterate_supers after setting cleancache_ops, all super blocks that had been created before cleancache_register_ops was called will be assigned pool ids by the action function of iterate_supers while all newer super blocks will receive it in cleancache_init_fs. This patch therefore removes the maps and hence the artificial limit on the number of cleancache enabled filesystems. Signed-off-by: Vladimir Davydov Cc: Konrad Rzeszutek Wilk Cc: Boris Ostrovsky Cc: David Vrabel Cc: Mark Fasheh Cc: Joel Becker Cc: Stefan Hengelein Cc: Florian Schmaus Cc: Andor Daam Cc: Dan Magenheimer Cc: Bob Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cleancache.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h index b23611f43cfb..bda5ec0b4b4d 100644 --- a/include/linux/cleancache.h +++ b/include/linux/cleancache.h @@ -5,6 +5,10 @@ #include #include +#define CLEANCACHE_NO_POOL -1 +#define CLEANCACHE_NO_BACKEND -2 +#define CLEANCACHE_NO_BACKEND_SHARED -3 + #define CLEANCACHE_KEY_MAX 6 /* -- cgit v1.2.3 From 4167e9b2cf10f8a4bcda0c713ddc8bb0a18e8187 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 14 Apr 2015 15:46:55 -0700 Subject: mm: remove GFP_THISNODE NOTE: this is not about __GFP_THISNODE, this is only about GFP_THISNODE. GFP_THISNODE is a secret combination of gfp bits that have different behavior than expected. It is a combination of __GFP_THISNODE, __GFP_NORETRY, and __GFP_NOWARN and is special-cased in the page allocator slowpath to fail without trying reclaim even though it may be used in combination with __GFP_WAIT. An example of the problem this creates: commit e97ca8e5b864 ("mm: fix GFP_THISNODE callers and clarify") fixed up many users of GFP_THISNODE that really just wanted __GFP_THISNODE. The problem doesn't end there, however, because even it was a no-op for alloc_misplaced_dst_page(), which also sets __GFP_NORETRY and __GFP_NOWARN, and migrate_misplaced_transhuge_page(), where __GFP_NORETRY and __GFP_NOWAIT is set in GFP_TRANSHUGE. Converting GFP_THISNODE to __GFP_THISNODE is a no-op in these cases since the page allocator special-cases __GFP_THISNODE && __GFP_NORETRY && __GFP_NOWARN. It's time to just remove GFP_THISNODE entirely. We leave __GFP_THISNODE to restrict an allocation to a local node, but remove GFP_THISNODE and its obscurity. Instead, we require that a caller clear __GFP_WAIT if it wants to avoid reclaim. This allows the aforementioned functions to actually reclaim as they should. It also enables any future callers that want to do __GFP_THISNODE but also __GFP_NORETRY && __GFP_NOWARN to reclaim. The rule is simple: if you don't want to reclaim, then don't set __GFP_WAIT. Aside: ovs_flow_stats_update() really wants to avoid reclaim as well, so it is unchanged. Signed-off-by: David Rientjes Acked-by: Vlastimil Babka Cc: Christoph Lameter Acked-by: Pekka Enberg Cc: Joonsoo Kim Acked-by: Johannes Weiner Cc: Mel Gorman Cc: Pravin Shelar Cc: Jarno Rajahalme Cc: Li Zefan Cc: Greg Thelen Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 51bd1e72a917..4423a0f8eabe 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -117,16 +117,6 @@ struct vm_area_struct; __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \ __GFP_NO_KSWAPD) -/* - * GFP_THISNODE does not perform any reclaim, you most likely want to - * use __GFP_THISNODE to allocate from a given node without fallback! - */ -#ifdef CONFIG_NUMA -#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY) -#else -#define GFP_THISNODE ((__force gfp_t)0) -#endif - /* This mask makes up all the page movable related flags */ #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) -- cgit v1.2.3 From ac173824959adeb489f9fcf88858774c4535a241 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Tue, 14 Apr 2015 15:47:04 -0700 Subject: mm: cma: constify and use correct signness in mm/cma.c Constify function parameters and use correct signness where needed. Signed-off-by: Sasha Levin Cc: Michal Nazarewicz Cc: Marek Szyprowski Cc: Joonsoo Kim Cc: Laurent Pinchart Acked-by: Gregory Fong Cc: Pintu Kumar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cma.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cma.h b/include/linux/cma.h index 9384ba66e975..f7ef093ec49a 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -16,16 +16,16 @@ struct cma; extern unsigned long totalcma_pages; -extern phys_addr_t cma_get_base(struct cma *cma); -extern unsigned long cma_get_size(struct cma *cma); +extern phys_addr_t cma_get_base(const struct cma *cma); +extern unsigned long cma_get_size(const struct cma *cma); extern int __init cma_declare_contiguous(phys_addr_t base, phys_addr_t size, phys_addr_t limit, phys_addr_t alignment, unsigned int order_per_bit, bool fixed, struct cma **res_cma); -extern int cma_init_reserved_mem(phys_addr_t base, - phys_addr_t size, int order_per_bit, +extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, + unsigned int order_per_bit, struct cma **res_cma); -extern struct page *cma_alloc(struct cma *cma, int count, unsigned int align); -extern bool cma_release(struct cma *cma, struct page *pages, int count); +extern struct page *cma_alloc(struct cma *cma, unsigned int count, unsigned int align); +extern bool cma_release(struct cma *cma, const struct page *pages, unsigned int count); #endif -- cgit v1.2.3 From 647757197cd34fae041e21af39ded00f5c346fc4 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 14 Apr 2015 15:47:07 -0700 Subject: mm: clarify __GFP_NOFAIL deprecation status __GFP_NOFAIL is documented as a deprecated flag since commit 478352e789f5 ("mm: add comment about deprecation of __GFP_NOFAIL"). This has discouraged people from using it but in some cases an opencoded endless loop around allocator has been used instead. So the allocator is not aware of the de facto __GFP_NOFAIL allocation because this information was not communicated properly. Let's make clear that if the allocation context really cannot afford failure because there is no good failure policy then using __GFP_NOFAIL is preferable to opencoding the loop outside of the allocator. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Michal Hocko Acked-by: David Rientjes Cc: Johannes Weiner Cc: Dave Chinner Cc: "Theodore Ts'o" Cc: Mel Gorman Cc: Tetsuo Handa Cc: "David S. Miller" Cc: Vipul Pandya Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 4423a0f8eabe..97a9373e61e8 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -57,8 +57,10 @@ struct vm_area_struct; * _might_ fail. This depends upon the particular VM implementation. * * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller - * cannot handle allocation failures. This modifier is deprecated and no new - * users should be added. + * cannot handle allocation failures. New users should be evaluated carefully + * (and the flag should be used only when there is no reasonable failure policy) + * but it is definitely preferable to use the flag rather than opencode endless + * loop around allocator. * * __GFP_NORETRY: The VM implementation must not retry indefinitely. * -- cgit v1.2.3 From 0ddab1d2ed664c85c95488eef569786a84aedf37 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 14 Apr 2015 15:47:20 -0700 Subject: lib/ioremap.c: add huge I/O map capability interfaces Add ioremap_pud_enabled() and ioremap_pmd_enabled(), which return 1 when I/O mappings with pud/pmd are enabled on the kernel. ioremap_huge_init() calls arch_ioremap_pud_supported() and arch_ioremap_pmd_supported() to initialize the capabilities at boot-time. A new kernel option "nohugeiomap" is also added, so that user can disable the huge I/O map capabilities when necessary. Signed-off-by: Toshi Kani Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Arnd Bergmann Cc: Dave Hansen Cc: Robert Elliott Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/io.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/io.h b/include/linux/io.h index fa02e55e5a2e..4cc299c598e0 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -38,6 +38,14 @@ static inline int ioremap_page_range(unsigned long addr, unsigned long end, } #endif +#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP +void __init ioremap_huge_init(void); +int arch_ioremap_pud_supported(void); +int arch_ioremap_pmd_supported(void); +#else +static inline void ioremap_huge_init(void) { } +#endif + /* * Managed iomap interface */ -- cgit v1.2.3 From 2b68f6caeac271620cd2f9362aeaed360e317df0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 14 Apr 2015 15:48:00 -0700 Subject: mm: expose arch_mmap_rnd when available When an architecture fully supports randomizing the ELF load location, a per-arch mmap_rnd() function is used to find a randomized mmap base. In preparation for randomizing the location of ET_DYN binaries separately from mmap, this renames and exports these functions as arch_mmap_rnd(). Additionally introduces CONFIG_ARCH_HAS_ELF_RANDOMIZE for describing this feature on architectures that support it (which is a superset of ARCH_BINFMT_ELF_RANDOMIZE_PIE, since s390 already supports a separated ET_DYN ASLR from mmap ASLR without the ARCH_BINFMT_ELF_RANDOMIZE_PIE logic). Signed-off-by: Kees Cook Cc: Hector Marco-Gisbert Cc: Russell King Reviewed-by: Ingo Molnar Cc: Catalin Marinas Cc: Will Deacon Cc: Ralf Baechle Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Alexander Viro Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: "David A. Long" Cc: Andrey Ryabinin Cc: Arun Chandran Cc: Yann Droneaud Cc: Min-Hua Chen Cc: Paul Burton Cc: Alex Smith Cc: Markos Chandras Cc: Vineeth Vijayan Cc: Jeff Bailey Cc: Michael Holzheu Cc: Ben Hutchings Cc: Behan Webster Cc: Ismael Ripoll Cc: Jan-Simon Mller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/elf-randomize.h | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 include/linux/elf-randomize.h (limited to 'include/linux') diff --git a/include/linux/elf-randomize.h b/include/linux/elf-randomize.h new file mode 100644 index 000000000000..7a4eda02d2b1 --- /dev/null +++ b/include/linux/elf-randomize.h @@ -0,0 +1,10 @@ +#ifndef _ELF_RANDOMIZE_H +#define _ELF_RANDOMIZE_H + +#ifndef CONFIG_ARCH_HAS_ELF_RANDOMIZE +static inline unsigned long arch_mmap_rnd(void) { return 0; } +#else +extern unsigned long arch_mmap_rnd(void); +#endif + +#endif -- cgit v1.2.3 From 204db6ed17743000691d930368a5abd6ea541c58 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 14 Apr 2015 15:48:12 -0700 Subject: mm: fold arch_randomize_brk into ARCH_HAS_ELF_RANDOMIZE The arch_randomize_brk() function is used on several architectures, even those that don't support ET_DYN ASLR. To avoid bulky extern/#define tricks, consolidate the support under CONFIG_ARCH_HAS_ELF_RANDOMIZE for the architectures that support it, while still handling CONFIG_COMPAT_BRK. Signed-off-by: Kees Cook Cc: Hector Marco-Gisbert Cc: Russell King Reviewed-by: Ingo Molnar Cc: Catalin Marinas Cc: Will Deacon Cc: Ralf Baechle Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Alexander Viro Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: "David A. Long" Cc: Andrey Ryabinin Cc: Arun Chandran Cc: Yann Droneaud Cc: Min-Hua Chen Cc: Paul Burton Cc: Alex Smith Cc: Markos Chandras Cc: Vineeth Vijayan Cc: Jeff Bailey Cc: Michael Holzheu Cc: Ben Hutchings Cc: Behan Webster Cc: Ismael Ripoll Cc: Jan-Simon Mller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/elf-randomize.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/elf-randomize.h b/include/linux/elf-randomize.h index 7a4eda02d2b1..b5f0bda9472e 100644 --- a/include/linux/elf-randomize.h +++ b/include/linux/elf-randomize.h @@ -1,10 +1,22 @@ #ifndef _ELF_RANDOMIZE_H #define _ELF_RANDOMIZE_H +struct mm_struct; + #ifndef CONFIG_ARCH_HAS_ELF_RANDOMIZE static inline unsigned long arch_mmap_rnd(void) { return 0; } +# if defined(arch_randomize_brk) && defined(CONFIG_COMPAT_BRK) +# define compat_brk_randomized +# endif +# ifndef arch_randomize_brk +# define arch_randomize_brk(mm) (mm->brk) +# endif #else extern unsigned long arch_mmap_rnd(void); +extern unsigned long arch_randomize_brk(struct mm_struct *mm); +# ifdef CONFIG_COMPAT_BRK +# define compat_brk_randomized +# endif #endif #endif -- cgit v1.2.3 From 2a8e70026435ad97570a1e0a0c4c941e0f700a3e Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 14 Apr 2015 15:48:15 -0700 Subject: mm: numa: remove migrate_ratelimited This code is dead since commit 9e645ab6d089 ("sched/numa: Continue PTE scanning even if migrate rate limited") so remove it. Signed-off-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 78baed5f2952..cac1c0904d5f 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -69,7 +69,6 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping, extern bool pmd_trans_migrating(pmd_t pmd); extern int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, int node); -extern bool migrate_ratelimited(int node); #else static inline bool pmd_trans_migrating(pmd_t pmd) { @@ -80,10 +79,6 @@ static inline int migrate_misplaced_page(struct page *page, { return -EAGAIN; /* can't migrate now */ } -static inline bool migrate_ratelimited(int node) -{ - return false; -} #endif /* CONFIG_NUMA_BALANCING */ #if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE) -- cgit v1.2.3 From 2415b9f5cb048a803b30b790af994ba71ff0bd4c Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Tue, 14 Apr 2015 15:48:18 -0700 Subject: memcg: print cgroup information when system panics due to panic_on_oom If kernel panics due to oom, caused by a cgroup reaching its limit, when 'compulsory panic_on_oom' is enabled, then we will only see that the OOM happened because of "compulsory panic_on_oom is enabled" but this doesn't tell the difference between mempolicy and memcg. And dumping system wide information is plain wrong and more confusing. This patch provides the information of the cgroup whose limit triggerred panic Signed-off-by: Balasubramani Vivekanandan Acked-by: Michal Hocko Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/oom.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/oom.h b/include/linux/oom.h index d5771bed59c9..44b2f6f7bbd8 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -66,7 +66,8 @@ extern bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_flags); extern void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_flags); extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, - int order, const nodemask_t *nodemask); + int order, const nodemask_t *nodemask, + struct mem_cgroup *memcg); extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task, unsigned long totalpages, const nodemask_t *nodemask, -- cgit v1.2.3 From 11d83360452ea2a95e699da01f8e1bcc4676a5de Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 14 Apr 2015 15:48:21 -0700 Subject: mm, mempool: do not allow atomic resizing Allocating a large number of elements in atomic context could quickly deplete memory reserves, so just disallow atomic resizing entirely. Nothing currently uses mempool_resize() with anything other than GFP_KERNEL, so convert existing callers to drop the gfp_mask. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: David Rientjes Acked-by: Steffen Maier [zfcp] Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Steve French Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mempool.h b/include/linux/mempool.h index 39ed62ab5b8a..b19b3023c880 100644 --- a/include/linux/mempool.h +++ b/include/linux/mempool.h @@ -29,7 +29,7 @@ extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data, gfp_t gfp_mask, int nid); -extern int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask); +extern int mempool_resize(mempool_t *pool, int new_min_nr); extern void mempool_destroy(mempool_t *pool); extern void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask); extern void mempool_free(void *element, mempool_t *pool); -- cgit v1.2.3 From 4a20799d11f64e6b8725cacc7619b1ae1dbf9acd Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Tue, 14 Apr 2015 15:48:27 -0700 Subject: mm: move memtest under mm Memtest is a simple feature which fills the memory with a given set of patterns and validates memory contents, if bad memory regions is detected it reserves them via memblock API. Since memblock API is widely used by other architectures this feature can be enabled outside of x86 world. This patch set promotes memtest to live under generic mm umbrella and enables memtest feature for arm/arm64. It was reported that this patch set was useful for tracking down an issue with some errant DMA on an arm64 platform. This patch (of 6): There is nothing platform dependent in the core memtest code, so other platforms might benefit from this feature too. [linux@roeck-us.net: MEMTEST depends on MEMBLOCK] Signed-off-by: Vladimir Murzin Acked-by: Will Deacon Tested-by: Mark Rutland Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Catalin Marinas Cc: Russell King Cc: Paul Bolle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index e8cc45307f8f..6724cb020f5e 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -365,6 +365,14 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo #define __initdata_memblock #endif +#ifdef CONFIG_MEMTEST +extern void early_memtest(unsigned long start, unsigned long end); +#else +static inline void early_memtest(unsigned long start, unsigned long end) +{ +} +#endif + #else static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align) { -- cgit v1.2.3 From 7f70baeeb9e2d2b2a37a4bd3727d709547c4ae41 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Tue, 14 Apr 2015 15:48:30 -0700 Subject: memtest: use phys_addr_t for physical addresses Since memtest might be used by other architectures pass input parameters as phys_addr_t instead of long to prevent overflow. Signed-off-by: Vladimir Murzin Acked-by: Will Deacon Tested-by: Mark Rutland Cc: "H. Peter Anvin" Cc: Catalin Marinas Cc: Ingo Molnar Cc: Russell King Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 6724cb020f5e..9497ec7c77ea 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -366,9 +366,9 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo #endif #ifdef CONFIG_MEMTEST -extern void early_memtest(unsigned long start, unsigned long end); +extern void early_memtest(phys_addr_t start, phys_addr_t end); #else -static inline void early_memtest(unsigned long start, unsigned long end) +static inline void early_memtest(phys_addr_t start, phys_addr_t end) { } #endif -- cgit v1.2.3 From d616a703a52cf972425cddd43fc01cd4ef867faf Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 5 Apr 2015 16:59:25 +0200 Subject: of: Add dummy of_irq_to_resource_table() for IRQ_OF=n If CONFIG_IRQ_OF=n: drivers/built-in.o: In function `of_device_alloc': (.text+0x72bce): undefined reference to `of_irq_to_resource_table' make: *** [vmlinux] Error 1 of_device_alloc() calls of_irq_to_resource_table() with nr_irqs = 0 due to of_irq_count() already being a dummy, so just add a dummy for of_irq_to_resource_table(), too. Signed-off-by: Geert Uytterhoeven Signed-off-by: Rob Herring --- include/linux/of_irq.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index bfec136a6d1e..d884929a7747 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -37,8 +37,6 @@ extern int of_irq_parse_one(struct device_node *device, int index, extern unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data); extern int of_irq_to_resource(struct device_node *dev, int index, struct resource *r); -extern int of_irq_to_resource_table(struct device_node *dev, - struct resource *res, int nr_irqs); extern void of_irq_init(const struct of_device_id *matches); @@ -46,6 +44,8 @@ extern void of_irq_init(const struct of_device_id *matches); extern int of_irq_count(struct device_node *dev); extern int of_irq_get(struct device_node *dev, int index); extern int of_irq_get_byname(struct device_node *dev, const char *name); +extern int of_irq_to_resource_table(struct device_node *dev, + struct resource *res, int nr_irqs); #else static inline int of_irq_count(struct device_node *dev) { @@ -59,6 +59,11 @@ static inline int of_irq_get_byname(struct device_node *dev, const char *name) { return 0; } +static inline int of_irq_to_resource_table(struct device_node *dev, + struct resource *res, int nr_irqs) +{ + return 0; +} #endif #if defined(CONFIG_OF) -- cgit v1.2.3 From 37786c7fee40771d13901de129af7e084ed48b55 Mon Sep 17 00:00:00 2001 From: Kevin Cernekee Date: Thu, 9 Apr 2015 13:05:14 -0700 Subject: of: Add helper function to check MMIO register endianness SoC peripherals can come in several different flavors: - little-endian: registers always need to be accessed in LE mode (so the kernel should perform a swap if the CPU is running BE) - big-endian: registers always need to be accessed in BE mode (so the kernel should perform a swap if the CPU is running LE) - native-endian: the bus will automatically swap accesses, so the kernel should never swap Introduce a function that checks an OF device node to see whether it contains a "big-endian" or "native-endian" property. For the former case, always return true. For the latter case, return true iff the kernel was built for BE (implying that the BE MMIO accessors do not perform a swap). Otherwise return false, assuming LE registers. LE registers are assumed by default because most existing drivers (libahci, serial8250, usb) always use readl/writel in the absence of instructions to the contrary, so that will be our fallback. Signed-off-by: Kevin Cernekee Reviewed-by: Peter Hurley Acked-by: Greg Kroah-Hartman Signed-off-by: Rob Herring --- include/linux/of.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index dfde07e77a63..a0cd62ef22db 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -305,6 +305,7 @@ extern int of_property_read_string_helper(struct device_node *np, extern int of_device_is_compatible(const struct device_node *device, const char *); extern bool of_device_is_available(const struct device_node *device); +extern bool of_device_is_big_endian(const struct device_node *device); extern const void *of_get_property(const struct device_node *node, const char *name, int *lenp); @@ -466,6 +467,11 @@ static inline bool of_device_is_available(const struct device_node *device) return false; } +static inline bool of_device_is_big_endian(const struct device_node *device) +{ + return false; +} + static inline struct property *of_find_property(const struct device_node *np, const char *name, int *lenp) -- cgit v1.2.3 From cc7837867a559feba70fdf68eb53c24a84e3712f Mon Sep 17 00:00:00 2001 From: Kevin Cernekee Date: Thu, 9 Apr 2015 13:05:15 -0700 Subject: of/fdt: Add endianness helper function for early init code Provide a libfdt-based equivalent for of_device_is_big_endian(), suitable for use in the early_init_* functions. Signed-off-by: Kevin Cernekee Reviewed-by: Peter Hurley Acked-by: Greg Kroah-Hartman Signed-off-by: Rob Herring --- include/linux/of_fdt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 0ff360d5b3b3..587ee507965d 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -33,6 +33,8 @@ extern void *of_fdt_get_property(const void *blob, extern int of_fdt_is_compatible(const void *blob, unsigned long node, const char *compat); +extern bool of_fdt_is_big_endian(const void *blob, + unsigned long node); extern int of_fdt_match(const void *blob, unsigned long node, const char *const *compat); extern void of_fdt_unflatten_tree(unsigned long *blob, -- cgit v1.2.3 From 9abbfb486f5c254805bb6a3f263bc14d989eb90b Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 15 Apr 2015 10:17:45 +0930 Subject: virtio: drop virtio_device_is_legacy_only virtio_device_is_legacy_only is now unused, drop it from core. Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck Signed-off-by: Rusty Russell --- include/linux/virtio.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 28f0e65b9a11..8f4d4bfa6d46 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -108,8 +108,6 @@ struct virtio_device { void *priv; }; -bool virtio_device_is_legacy_only(struct virtio_device_id id); - static inline struct virtio_device *dev_to_virtio(struct device *_dev) { return container_of(_dev, struct virtio_device, dev); -- cgit v1.2.3 From e6e20a7a5f3f49bfee518d5c6849107398d83912 Mon Sep 17 00:00:00 2001 From: Dan Ehrenberg Date: Tue, 10 Feb 2015 15:20:49 -0800 Subject: init: export name_to_dev_t and mark name argument as const DM will switch its device lookup code to using name_to_dev_t() so it must be exported. Also, the @name argument should be marked const. Signed-off-by: Dan Ehrenberg Signed-off-by: Mike Snitzer --- include/linux/mount.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index c2c561dc0114..bca086d62b1a 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -92,6 +92,6 @@ extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list); extern void mark_mounts_for_expiry(struct list_head *mounts); -extern dev_t name_to_dev_t(char *name); +extern dev_t name_to_dev_t(const char *name); #endif /* _LINUX_MOUNT_H */ -- cgit v1.2.3 From 65a4a1cad7c56e7056fb4b35ac2d93695612612c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 9 Apr 2015 14:11:08 -0400 Subject: nfs: generic_write_checks() shouldn't be done on swapout... Signed-off-by: Al Viro --- include/linux/nfs_fs.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 3d1b0d2fe55e..410abd172feb 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -452,8 +452,7 @@ extern ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, loff_t pos); extern ssize_t nfs_file_direct_write(struct kiocb *iocb, - struct iov_iter *iter, - loff_t pos); + struct iov_iter *iter); /* * linux/fs/nfs/dir.c -- cgit v1.2.3 From 525d27b23555419e0e7b73fb6e78d4d678cb4f32 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 11 Feb 2015 13:40:17 +0000 Subject: VFS: Add owner-filesystem positive/negative dentry checks Supply two functions to test whether a filesystem's own dentries are positive or negative (d_really_is_positive() and d_really_is_negative()). The problem is that the DCACHE_ENTRY_TYPE field of dentry->d_flags may be overridden by the union part of a layered filesystem and isn't thus necessarily indicative of the type of dentry. Normally, this would involve a negative dentry (ie. ->d_inode == NULL) having ->d_layer.lower pointed to a lower layer dentry, DCACHE_PINNING_LOWER set and the DCACHE_ENTRY_TYPE field set to something other than DCACHE_MISS_TYPE - but it could also involve, say, a DCACHE_SPECIAL_TYPE being overridden to DCACHE_WHITEOUT_TYPE if a 0,0 chardev is detected in the top layer. However, inside a filesystem, when that fs is looking at its own dentries, it probably wants to know if they are really negative or not - and doesn't care about the fallthrough bits used by the union. To this end, a filesystem should normally use d_really_is_positive/negative() when looking at its own dentries rather than d_is_positive/negative() and should use d_inode() to get at the inode. Anyone looking at someone else's dentries (this includes pathwalk) should use d_is_xxx() and d_backing_inode(). Signed-off-by: David Howells Signed-off-by: Al Viro --- include/linux/dcache.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index d8358799c594..e83768ee38fc 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -482,6 +482,44 @@ static inline bool d_is_positive(const struct dentry *dentry) return !d_is_negative(dentry); } +/** + * d_really_is_negative - Determine if a dentry is really negative (ignoring fallthroughs) + * @dentry: The dentry in question + * + * Returns true if the dentry represents either an absent name or a name that + * doesn't map to an inode (ie. ->d_inode is NULL). The dentry could represent + * a true miss, a whiteout that isn't represented by a 0,0 chardev or a + * fallthrough marker in an opaque directory. + * + * Note! (1) This should be used *only* by a filesystem to examine its own + * dentries. It should not be used to look at some other filesystem's + * dentries. (2) It should also be used in combination with d_inode() to get + * the inode. (3) The dentry may have something attached to ->d_lower and the + * type field of the flags may be set to something other than miss or whiteout. + */ +static inline bool d_really_is_negative(const struct dentry *dentry) +{ + return dentry->d_inode == NULL; +} + +/** + * d_really_is_positive - Determine if a dentry is really positive (ignoring fallthroughs) + * @dentry: The dentry in question + * + * Returns true if the dentry represents a name that maps to an inode + * (ie. ->d_inode is not NULL). The dentry might still represent a whiteout if + * that is represented on medium as a 0,0 chardev. + * + * Note! (1) This should be used *only* by a filesystem to examine its own + * dentries. It should not be used to look at some other filesystem's + * dentries. (2) It should also be used in combination with d_inode() to get + * the inode. + */ +static inline bool d_really_is_positive(const struct dentry *dentry) +{ + return dentry->d_inode != NULL; +} + extern void d_set_fallthru(struct dentry *dentry); static inline bool d_is_fallthru(const struct dentry *dentry) -- cgit v1.2.3 From 4bf46a272647d89e780126b52eda04737defd9f4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 5 Mar 2015 14:09:22 +0000 Subject: VFS: Impose ordering on accesses of d_inode and d_flags Impose ordering on accesses of d_inode and d_flags to avoid the need to do this: if (!dentry->d_inode || d_is_negative(dentry)) { when this: if (d_is_negative(dentry)) { should suffice. This check is especially problematic if a dentry can have its type field set to something other than DENTRY_MISS_TYPE when d_inode is NULL (as in unionmount). What we really need to do is stick a write barrier between setting d_inode and setting d_flags and a read barrier between reading d_flags and reading d_inode. Signed-off-by: David Howells Signed-off-by: Al Viro --- include/linux/dcache.h | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index e83768ee38fc..df334cbacc6d 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -404,26 +404,11 @@ static inline bool d_mountpoint(const struct dentry *dentry) /* * Directory cache entry type accessor functions. */ -static inline void __d_set_type(struct dentry *dentry, unsigned type) -{ - dentry->d_flags = (dentry->d_flags & ~DCACHE_ENTRY_TYPE) | type; -} - -static inline void __d_clear_type(struct dentry *dentry) -{ - __d_set_type(dentry, DCACHE_MISS_TYPE); -} - -static inline void d_set_type(struct dentry *dentry, unsigned type) -{ - spin_lock(&dentry->d_lock); - __d_set_type(dentry, type); - spin_unlock(&dentry->d_lock); -} - static inline unsigned __d_entry_type(const struct dentry *dentry) { - return dentry->d_flags & DCACHE_ENTRY_TYPE; + unsigned type = READ_ONCE(dentry->d_flags); + smp_rmb(); + return type & DCACHE_ENTRY_TYPE; } static inline bool d_is_miss(const struct dentry *dentry) -- cgit v1.2.3 From 773af94e4e3984d4055c332602de5d0d2ee3d840 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 3 Mar 2015 10:54:48 +0200 Subject: net/mlx4_core: Manage alias GUID per VF Manages alias GUIDs per VF per port in the core layer. This is a pre-step for managing alias GUIDs in a mode that the admin GUID is returned via ib_query_gid() regardless of whether the SM has approved it or not. Signed-off-by: Yishai Hadas Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Doug Ledford --- include/linux/mlx4/device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index f9ce34bec45b..39a91b0c5d5c 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1345,6 +1345,9 @@ int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port); int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx); void mlx4_counter_free(struct mlx4_dev *dev, u32 idx); +void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry, + int port); +__be64 mlx4_get_admin_guid(struct mlx4_dev *dev, int entry, int port); int mlx4_flow_attach(struct mlx4_dev *dev, struct mlx4_net_trans_rule *rule, u64 *reg_id); int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id); -- cgit v1.2.3 From fb517a4f03041c5eaed394bd57ee518b44301f1a Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 3 Mar 2015 11:23:32 +0200 Subject: net/mlx4_core: Set initial admin GUIDs for VFs To have out of the box experience, the PF generates random GUIDs who serve as the initial admin values. Signed-off-by: Yishai Hadas Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Doug Ledford --- include/linux/mlx4/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 39a91b0c5d5c..83e80ab94500 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1348,6 +1348,7 @@ void mlx4_counter_free(struct mlx4_dev *dev, u32 idx); void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry, int port); __be64 mlx4_get_admin_guid(struct mlx4_dev *dev, int entry, int port); +void mlx4_set_random_admin_guid(struct mlx4_dev *dev, int entry, int port); int mlx4_flow_attach(struct mlx4_dev *dev, struct mlx4_net_trans_rule *rule, u64 *reg_id); int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id); -- cgit v1.2.3 From f2b91d8d385d2cef3a1e3b3846f2dde4a6720c43 Mon Sep 17 00:00:00 2001 From: Zhang Zhen Date: Wed, 15 Apr 2015 16:12:51 -0700 Subject: vfs: delete vfs_readdir function declaration vfs_readdir() was replaced by iterate_dir() in commit 5c0ba4e0762e ("[readdir] introduce iterate_dir() and dir_context"). Signed-off-by: Zhang Zhen Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index d502e5436c84..60733bdc74b4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2684,7 +2684,6 @@ void inode_sub_bytes(struct inode *inode, loff_t bytes); loff_t inode_get_bytes(struct inode *inode); void inode_set_bytes(struct inode *inode, loff_t bytes); -extern int vfs_readdir(struct file *, filldir_t, void *); extern int iterate_dir(struct file *, struct dir_context *); extern int vfs_stat(const char __user *, struct kstat *); -- cgit v1.2.3 From d7e4a2ea51c1b0ac0b2d53f5ab4a2e878b1c4aec Mon Sep 17 00:00:00 2001 From: Zhang Zhen Date: Wed, 15 Apr 2015 16:12:57 -0700 Subject: mm: refactor zone_movable_is_highmem() All callers of zone_movable_is_highmem are under #ifdef CONFIG_HIGHMEM, so the else branch return 0 is not needed. Signed-off-by: Zhang Zhen Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 2782df47101e..54d74f6eb233 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -842,16 +842,16 @@ static inline int populated_zone(struct zone *zone) extern int movable_zone; +#ifdef CONFIG_HIGHMEM static inline int zone_movable_is_highmem(void) { -#if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP return movable_zone == ZONE_HIGHMEM; -#elif defined(CONFIG_HIGHMEM) - return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM; #else - return 0; + return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM; #endif } +#endif static inline int is_highmem_idx(enum zone_type idx) { -- cgit v1.2.3 From e8c6158fef15a1532bd5242a0cd88565eedabe61 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 15 Apr 2015 16:13:08 -0700 Subject: mm: consolidate all page-flags helpers in Currently we take a naive approach to page flags on compound pages - we set the flag on the page without consideration if the flag makes sense for tail page or for compound page in general. This patchset try to sort this out by defining per-flag policy on what need to be done if page-flag helper operate on compound page. The last patch in the patchset also sanitizes usege of page->mapping for tail pages. We don't define the meaning of page->mapping for tail pages. Currently it's always NULL, which can be inconsistent with head page and potentially lead to problems. For now I caught one case of illegal usage of page flags or ->mapping: sound subsystem allocates pages with __GFP_COMP and maps them with PTEs. It leads to setting dirty bit on tail pages and access to tail_page's ->mapping. I don't see any bad behaviour caused by this, but worth fixing anyway. This patchset makes more sense if you take my THP refcounting into account: we will see more compound pages mapped with PTEs and we need to define behaviour of flags on compound pages to avoid bugs. This patch (of 16): We have page-flags helper function declarations/definitions spread over several header files. Let's consolidate them in . Signed-off-by: Kirill A. Shutemov Cc: Andrea Arcangeli Acked-by: Hugh Dickins Cc: Dave Hansen Cc: Mel Gorman Cc: Rik van Riel Cc: Vlastimil Babka Cc: Christoph Lameter Cc: Naoya Horiguchi Cc: Steve Capper Cc: "Aneesh Kumar K.V" Cc: Johannes Weiner Cc: Michal Hocko Cc: Jerome Marchand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 7 ---- include/linux/ksm.h | 17 -------- include/linux/mm.h | 81 -------------------------------------- include/linux/page-flags.h | 96 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 96 insertions(+), 105 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 7b5785032049..1a782733a420 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -41,8 +41,6 @@ extern int hugetlb_max_hstate __read_mostly; struct hugepage_subpool *hugepage_new_subpool(long nr_blocks); void hugepage_put_subpool(struct hugepage_subpool *spool); -int PageHuge(struct page *page); - void reset_vma_resv_huge_pages(struct vm_area_struct *vma); int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); @@ -109,11 +107,6 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, #else /* !CONFIG_HUGETLB_PAGE */ -static inline int PageHuge(struct page *page) -{ - return 0; -} - static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma) { } diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 3be6bb18562d..7ae216a39c9e 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -35,18 +35,6 @@ static inline void ksm_exit(struct mm_struct *mm) __ksm_exit(mm); } -/* - * A KSM page is one of those write-protected "shared pages" or "merged pages" - * which KSM maps into multiple mms, wherever identical anonymous page content - * is found in VM_MERGEABLE vmas. It's a PageAnon page, pointing not to any - * anon_vma, but to that page's node of the stable tree. - */ -static inline int PageKsm(struct page *page) -{ - return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) == - (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM); -} - static inline struct stable_node *page_stable_node(struct page *page) { return PageKsm(page) ? page_rmapping(page) : NULL; @@ -87,11 +75,6 @@ static inline void ksm_exit(struct mm_struct *mm) { } -static inline int PageKsm(struct page *page) -{ - return 0; -} - #ifdef CONFIG_MMU static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags) diff --git a/include/linux/mm.h b/include/linux/mm.h index 6571dd78e984..fb1fc38b01ce 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -494,15 +494,6 @@ static inline int page_count(struct page *page) return atomic_read(&compound_head(page)->_count); } -#ifdef CONFIG_HUGETLB_PAGE -extern int PageHeadHuge(struct page *page_head); -#else /* CONFIG_HUGETLB_PAGE */ -static inline int PageHeadHuge(struct page *page_head) -{ - return 0; -} -#endif /* CONFIG_HUGETLB_PAGE */ - static inline bool __compound_tail_refcounted(struct page *page) { return !PageSlab(page) && !PageHeadHuge(page); @@ -571,53 +562,6 @@ static inline void init_page_count(struct page *page) atomic_set(&page->_count, 1); } -/* - * PageBuddy() indicate that the page is free and in the buddy system - * (see mm/page_alloc.c). - * - * PAGE_BUDDY_MAPCOUNT_VALUE must be <= -2 but better not too close to - * -2 so that an underflow of the page_mapcount() won't be mistaken - * for a genuine PAGE_BUDDY_MAPCOUNT_VALUE. -128 can be created very - * efficiently by most CPU architectures. - */ -#define PAGE_BUDDY_MAPCOUNT_VALUE (-128) - -static inline int PageBuddy(struct page *page) -{ - return atomic_read(&page->_mapcount) == PAGE_BUDDY_MAPCOUNT_VALUE; -} - -static inline void __SetPageBuddy(struct page *page) -{ - VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page); - atomic_set(&page->_mapcount, PAGE_BUDDY_MAPCOUNT_VALUE); -} - -static inline void __ClearPageBuddy(struct page *page) -{ - VM_BUG_ON_PAGE(!PageBuddy(page), page); - atomic_set(&page->_mapcount, -1); -} - -#define PAGE_BALLOON_MAPCOUNT_VALUE (-256) - -static inline int PageBalloon(struct page *page) -{ - return atomic_read(&page->_mapcount) == PAGE_BALLOON_MAPCOUNT_VALUE; -} - -static inline void __SetPageBalloon(struct page *page) -{ - VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page); - atomic_set(&page->_mapcount, PAGE_BALLOON_MAPCOUNT_VALUE); -} - -static inline void __ClearPageBalloon(struct page *page) -{ - VM_BUG_ON_PAGE(!PageBalloon(page), page); - atomic_set(&page->_mapcount, -1); -} - void put_page(struct page *page); void put_pages_list(struct list_head *pages); @@ -1006,26 +950,6 @@ void page_address_init(void); #define page_address_init() do { } while(0) #endif -/* - * On an anonymous page mapped into a user virtual memory area, - * page->mapping points to its anon_vma, not to a struct address_space; - * with the PAGE_MAPPING_ANON bit set to distinguish it. See rmap.h. - * - * On an anonymous page in a VM_MERGEABLE area, if CONFIG_KSM is enabled, - * the PAGE_MAPPING_KSM bit may be set along with the PAGE_MAPPING_ANON bit; - * and then page->mapping points, not to an anon_vma, but to a private - * structure which KSM associates with that merged page. See ksm.h. - * - * PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is currently never used. - * - * Please note that, confusingly, "page_mapping" refers to the inode - * address_space which maps the page from disk; whereas "page_mapped" - * refers to user virtual address space into which the page is mapped. - */ -#define PAGE_MAPPING_ANON 1 -#define PAGE_MAPPING_KSM 2 -#define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM) - extern struct address_space *page_mapping(struct page *page); /* Neutral page->mapping pointer to address_space or anon_vma or other */ @@ -1045,11 +969,6 @@ struct address_space *page_file_mapping(struct page *page) return page->mapping; } -static inline int PageAnon(struct page *page) -{ - return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0; -} - /* * Return the pagecache index of the passed page. Regular pagecache pages * use ->index whereas swapcache pages use ->private diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index c851ff92d5b3..84d10b65cec6 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -289,6 +289,47 @@ PAGEFLAG_FALSE(HWPoison) #define __PG_HWPOISON 0 #endif +/* + * On an anonymous page mapped into a user virtual memory area, + * page->mapping points to its anon_vma, not to a struct address_space; + * with the PAGE_MAPPING_ANON bit set to distinguish it. See rmap.h. + * + * On an anonymous page in a VM_MERGEABLE area, if CONFIG_KSM is enabled, + * the PAGE_MAPPING_KSM bit may be set along with the PAGE_MAPPING_ANON bit; + * and then page->mapping points, not to an anon_vma, but to a private + * structure which KSM associates with that merged page. See ksm.h. + * + * PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is currently never used. + * + * Please note that, confusingly, "page_mapping" refers to the inode + * address_space which maps the page from disk; whereas "page_mapped" + * refers to user virtual address space into which the page is mapped. + */ +#define PAGE_MAPPING_ANON 1 +#define PAGE_MAPPING_KSM 2 +#define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM) + +static inline int PageAnon(struct page *page) +{ + return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0; +} + +#ifdef CONFIG_KSM +/* + * A KSM page is one of those write-protected "shared pages" or "merged pages" + * which KSM maps into multiple mms, wherever identical anonymous page content + * is found in VM_MERGEABLE vmas. It's a PageAnon page, pointing not to any + * anon_vma, but to that page's node of the stable tree. + */ +static inline int PageKsm(struct page *page) +{ + return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) == + (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM); +} +#else +TESTPAGEFLAG_FALSE(Ksm) +#endif + u64 stable_page_flags(struct page *page); static inline int PageUptodate(struct page *page) @@ -426,6 +467,14 @@ static inline void ClearPageCompound(struct page *page) #endif /* !PAGEFLAGS_EXTENDED */ +#ifdef CONFIG_HUGETLB_PAGE +int PageHuge(struct page *page); +int PageHeadHuge(struct page *page); +#else +TESTPAGEFLAG_FALSE(Huge) +TESTPAGEFLAG_FALSE(HeadHuge) +#endif + #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * PageHuge() only returns true for hugetlbfs pages, but not for @@ -479,6 +528,53 @@ static inline int PageTransTail(struct page *page) } #endif +/* + * PageBuddy() indicate that the page is free and in the buddy system + * (see mm/page_alloc.c). + * + * PAGE_BUDDY_MAPCOUNT_VALUE must be <= -2 but better not too close to + * -2 so that an underflow of the page_mapcount() won't be mistaken + * for a genuine PAGE_BUDDY_MAPCOUNT_VALUE. -128 can be created very + * efficiently by most CPU architectures. + */ +#define PAGE_BUDDY_MAPCOUNT_VALUE (-128) + +static inline int PageBuddy(struct page *page) +{ + return atomic_read(&page->_mapcount) == PAGE_BUDDY_MAPCOUNT_VALUE; +} + +static inline void __SetPageBuddy(struct page *page) +{ + VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page); + atomic_set(&page->_mapcount, PAGE_BUDDY_MAPCOUNT_VALUE); +} + +static inline void __ClearPageBuddy(struct page *page) +{ + VM_BUG_ON_PAGE(!PageBuddy(page), page); + atomic_set(&page->_mapcount, -1); +} + +#define PAGE_BALLOON_MAPCOUNT_VALUE (-256) + +static inline int PageBalloon(struct page *page) +{ + return atomic_read(&page->_mapcount) == PAGE_BALLOON_MAPCOUNT_VALUE; +} + +static inline void __SetPageBalloon(struct page *page) +{ + VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page); + atomic_set(&page->_mapcount, PAGE_BALLOON_MAPCOUNT_VALUE); +} + +static inline void __ClearPageBalloon(struct page *page) +{ + VM_BUG_ON_PAGE(!PageBalloon(page), page); + atomic_set(&page->_mapcount, -1); +} + /* * If network-based swap is enabled, sl*b must keep track of whether pages * were allocated from pfmemalloc reserves. -- cgit v1.2.3 From 8d63d99a5dfbdb997d12dd3c07b2070ca723db3b Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 15 Apr 2015 16:13:12 -0700 Subject: mm: avoid tail page refcounting on non-THP compound pages THP uses tail page refcounting to be able to split huge pages at any time. Tail page refcounting is not needed for other users of compound pages and it's harmful because of overhead. We try to exclude non-THP pages from tail page refcounting using __compound_tail_refcounted() check. It excludes most common non-THP compound pages: SL*B and hugetlb, but it doesn't catch rest of __GFP_COMP users -- drivers. And it's not only about overhead. Drivers might want to use compound pages to get refcounting semantics suitable for mapping high-order pages to userspace. But tail page refcounting breaks it. Tail page refcounting uses ->_mapcount in tail pages to store GUP pins on them. It means GUP pins would affect page_mapcount() for tail pages. It's not a problem for THP, because it never maps tail pages. But unlike THP, drivers map parts of compound pages with PTEs and it makes page_mapcount() be called for tail pages. In particular, GUP pins would shift PSS up and affect /proc/kpagecount for such pages. But, I'm not aware about anything which can lead to crash or other serious misbehaviour. Since currently all THP pages are anonymous and all drivers pages are not, we can fix the __compound_tail_refcounted() check by requiring PageAnon() to enable tail page refcounting. Signed-off-by: Kirill A. Shutemov Acked-by: Hugh Dickins Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index fb1fc38b01ce..515ec5045b60 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -496,7 +496,7 @@ static inline int page_count(struct page *page) static inline bool __compound_tail_refcounted(struct page *page) { - return !PageSlab(page) && !PageHeadHuge(page); + return PageAnon(page) && !PageSlab(page) && !PageHeadHuge(page); } /* -- cgit v1.2.3 From 5bbe3547aa3ba5242366a322a28996872301b703 Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Wed, 15 Apr 2015 16:13:20 -0700 Subject: mm: allow compaction of unevictable pages Currently, pages which are marked as unevictable are protected from compaction, but not from other types of migration. The POSIX real time extension explicitly states that mlock() will prevent a major page fault, but the spirit of this is that mlock() should give a process the ability to control sources of latency, including minor page faults. However, the mlock manpage only explicitly says that a locked page will not be written to swap and this can cause some confusion. The compaction code today does not give a developer who wants to avoid swap but wants to have large contiguous areas available any method to achieve this state. This patch introduces a sysctl for controlling compaction behavior with respect to the unevictable lru. Users who demand no page faults after a page is present can set compact_unevictable_allowed to 0 and users who need the large contiguous areas can enable compaction on locked memory by leaving the default value of 1. To illustrate this problem I wrote a quick test program that mmaps a large number of 1MB files filled with random data. These maps are created locked and read only. Then every other mmap is unmapped and I attempt to allocate huge pages to the static huge page pool. When the compact_unevictable_allowed sysctl is 0, I cannot allocate hugepages after fragmenting memory. When the value is set to 1, allocations succeed. Signed-off-by: Eric B Munson Acked-by: Michal Hocko Acked-by: Vlastimil Babka Acked-by: Christoph Lameter Acked-by: David Rientjes Acked-by: Rik van Riel Cc: Vlastimil Babka Cc: Thomas Gleixner Cc: Christoph Lameter Cc: Peter Zijlstra Cc: Mel Gorman Cc: David Rientjes Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index a014559e4a49..aa8f61cf3a19 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -34,6 +34,7 @@ extern int sysctl_compaction_handler(struct ctl_table *table, int write, extern int sysctl_extfrag_threshold; extern int sysctl_extfrag_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos); +extern int sysctl_compact_unevictable_allowed; extern int fragmentation_index(struct zone *zone, unsigned int order); extern unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order, -- cgit v1.2.3 From cc5993bd7b8cff4a3e37042ee1358d1d5eafa70c Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 15 Apr 2015 16:13:26 -0700 Subject: mm: rename deactivate_page to deactivate_file_page "deactivate_page" was created for file invalidation so it has too specific logic for file-backed pages. So, let's change the name of the function and date to a file-specific one and yield the generic name. Signed-off-by: Minchan Kim Cc: Michal Hocko Cc: Johannes Weiner Cc: Mel Gorman Cc: Rik van Riel Cc: Shaohua Li Cc: Wang, Yalin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 7067eca501e2..cee108cbe2d5 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -307,7 +307,7 @@ extern void lru_add_drain(void); extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_all(void); extern void rotate_reclaimable_page(struct page *page); -extern void deactivate_page(struct page *page); +extern void deactivate_file_page(struct page *page); extern void swap_setup(void); extern void add_page_to_unevictable_list(struct page *page); -- cgit v1.2.3 From c6a918200c4f4ebf74b7e1ae4fea9115c7b297f8 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 15 Apr 2015 16:13:36 -0700 Subject: hugetlbfs: add minimum size tracking fields to subpool structure hugetlbfs allocates huge pages from the global pool as needed. Even if the global pool contains a sufficient number pages for the filesystem size at mount time, those global pages could be grabbed for some other use. As a result, filesystem huge page allocations may fail due to lack of pages. Applications such as a database want to use huge pages for performance reasons. hugetlbfs filesystem semantics with ownership and modes work well to manage access to a pool of huge pages. However, the application would like some reasonable assurance that allocations will not fail due to a lack of huge pages. At application startup time, the application would like to configure itself to use a specific number of huge pages. Before starting, the application can check to make sure that enough huge pages exist in the system global pools. However, there are no guarantees that those pages will be available when needed by the application. What the application wants is exclusive use of a subset of huge pages. Add a new hugetlbfs mount option 'min_size=' to indicate that the specified number of pages will be available for use by the filesystem. At mount time, this number of huge pages will be reserved for exclusive use of the filesystem. If there is not a sufficient number of free pages, the mount will fail. As pages are allocated to and freeed from the filesystem, the number of reserved pages is adjusted so that the specified minimum is maintained. This patch (of 4): Add a field to the subpool structure to indicate the minimimum number of huge pages to always be used by this subpool. This minimum count includes allocated pages as well as reserved pages. If the minimum number of pages for the subpool have not been allocated, pages are reserved up to this minimum. An additional field (rsv_hpages) is used to track the number of pages reserved to meet this minimum size. The hstate pointer in the subpool is convenient to have when reserving and unreserving the pages. Signed-off-by: Mike Kravetz Cc: Davidlohr Bueso Cc: Aneesh Kumar Cc: Joonsoo Kim Cc: Andi Kleen Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 1a782733a420..d1a77b87408d 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -22,7 +22,13 @@ struct mmu_gather; struct hugepage_subpool { spinlock_t lock; long count; - long max_hpages, used_hpages; + long max_hpages; /* Maximum huge pages or -1 if no maximum. */ + long used_hpages; /* Used count against maximum, includes */ + /* both alloced and reserved pages. */ + struct hstate *hstate; + long min_hpages; /* Minimum huge pages or -1 if no minimum. */ + long rsv_hpages; /* Pages reserved against global pool to */ + /* sasitfy minimum size. */ }; struct resv_map { -- cgit v1.2.3 From 7ca02d0ae586fe7df59632966a64f3f1a756ef05 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 15 Apr 2015 16:13:42 -0700 Subject: hugetlbfs: accept subpool min_size mount option and setup accordingly Make 'min_size=' be an option when mounting a hugetlbfs. This option takes the same value as the 'size' option. min_size can be specified without specifying size. If both are specified, min_size must be less that or equal to size else the mount will fail. If min_size is specified, then at mount time an attempt is made to reserve min_size pages. If the reservation fails, the mount fails. At umount time, the reserved pages are released. Signed-off-by: Mike Kravetz Cc: Davidlohr Bueso Cc: Aneesh Kumar Cc: Joonsoo Kim Cc: Andi Kleen Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index d1a77b87408d..5713c49a4a5c 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -44,7 +44,8 @@ extern int hugetlb_max_hstate __read_mostly; #define for_each_hstate(h) \ for ((h) = hstates; (h) < &hstates[hugetlb_max_hstate]; (h)++) -struct hugepage_subpool *hugepage_new_subpool(long nr_blocks); +struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages, + long min_hpages); void hugepage_put_subpool(struct hugepage_subpool *spool); void reset_vma_resv_huge_pages(struct vm_area_struct *vma); -- cgit v1.2.3 From e244c9e66f6197f55f6fbb2d5e70714e262cc595 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 15 Apr 2015 16:14:14 -0700 Subject: mm, mempool: disallow mempools based on slab caches with constructors All occurrences of mempools based on slab caches with object constructors have been removed from the tree, so disallow creating them. We can only dereference mem->ctor in mm/mempool.c without including mm/slab.h in include/linux/mempool.h. So simply note the restriction, just like the comment restricting usage of __GFP_ZERO, and warn on kernels with CONFIG_DEBUG_VM() if such a mempool is allocated from. We don't want to incur this check on every element allocation, so use VM_BUG_ON(). Signed-off-by: David Rientjes Cc: Dave Kleikamp Cc: Christoph Hellwig Cc: Sebastian Ott Cc: Mikulas Patocka Cc: Catalin Marinas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempool.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mempool.h b/include/linux/mempool.h index b19b3023c880..69b6951e8fd2 100644 --- a/include/linux/mempool.h +++ b/include/linux/mempool.h @@ -36,7 +36,8 @@ extern void mempool_free(void *element, mempool_t *pool); /* * A mempool_alloc_t and mempool_free_t that get the memory from - * a slab that is passed in through pool_data. + * a slab cache that is passed in through pool_data. + * Note: the slab cache may not have a ctor function. */ void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data); void mempool_free_slab(void *element, void *pool_data); -- cgit v1.2.3 From 7e1f049efb86bd86c06b80eeac0ef80cdeb8c0e7 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 15 Apr 2015 16:14:41 -0700 Subject: mm: hugetlb: cleanup using paeg_huge_active() Now we have an easy access to hugepages' activeness, so existing helpers to get the information can be cleaned up. [akpm@linux-foundation.org: s/PageHugeActive/page_huge_active/] Signed-off-by: Naoya Horiguchi Cc: Hugh Dickins Reviewed-by: Michal Hocko Cc: Mel Gorman Cc: Johannes Weiner Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 2 -- include/linux/page-flags.h | 7 +++++++ 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 5713c49a4a5c..205026175c42 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -84,7 +84,6 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed); int dequeue_hwpoisoned_huge_page(struct page *page); bool isolate_huge_page(struct page *page, struct list_head *list); void putback_active_hugepage(struct page *page); -bool is_hugepage_active(struct page *page); void free_huge_page(struct page *page); #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE @@ -152,7 +151,6 @@ static inline bool isolate_huge_page(struct page *page, struct list_head *list) return false; } #define putback_active_hugepage(p) do {} while (0) -#define is_hugepage_active(x) false static inline unsigned long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 84d10b65cec6..f34e040b34e9 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -470,11 +470,18 @@ static inline void ClearPageCompound(struct page *page) #ifdef CONFIG_HUGETLB_PAGE int PageHuge(struct page *page); int PageHeadHuge(struct page *page); +bool page_huge_active(struct page *page); #else TESTPAGEFLAG_FALSE(Huge) TESTPAGEFLAG_FALSE(HeadHuge) + +static inline bool page_huge_active(struct page *page) +{ + return 0; +} #endif + #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * PageHuge() only returns true for hugetlbfs pages, but not for -- cgit v1.2.3 From cdd7875e0c4db5c41e28b645d3bf7d41bd2cbb45 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 15 Apr 2015 16:14:47 -0700 Subject: include/linux/mm.h: simplify flag check Flip the flag test so that it is the simplest. No functional change, just a small readability improvement: No code changed: # arch/x86/kernel/sys_x86_64.o: text data bss dec hex filename 1551 24 0 1575 627 sys_x86_64.o.before 1551 24 0 1575 627 sys_x86_64.o.after md5: 70708d1b1ad35cc891118a69dc1a63f9 sys_x86_64.o.before.asm 70708d1b1ad35cc891118a69dc1a63f9 sys_x86_64.o.after.asm Signed-off-by: Borislav Petkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 515ec5045b60..68c21b2374c4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1894,10 +1894,10 @@ extern unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info); static inline unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info) { - if (!(info->flags & VM_UNMAPPED_AREA_TOPDOWN)) - return unmapped_area(info); - else + if (info->flags & VM_UNMAPPED_AREA_TOPDOWN) return unmapped_area_topdown(info); + else + return unmapped_area(info); } /* truncate.c */ -- cgit v1.2.3 From e39155ea11eac6da056b04669d7c9fc612e2065a Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 15 Apr 2015 16:14:53 -0700 Subject: mm: uninline and cleanup page-mapping related helpers Most-used page->mapping helper -- page_mapping() -- has already uninlined. Let's uninline also page_rmapping() and page_anon_vma(). It saves us depending on configuration around 400 bytes in text: text data bss dec hex filename 660318 99254 410000 1169572 11d8a4 mm/built-in.o-before 659854 99254 410000 1169108 11d6d4 mm/built-in.o I also tried to make code a bit more clean. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Kirill A. Shutemov Cc: Christoph Lameter Cc: Konstantin Khlebnikov Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 8 ++------ include/linux/rmap.h | 8 -------- 2 files changed, 2 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 68c21b2374c4..0e7bb2194da5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -950,14 +950,10 @@ void page_address_init(void); #define page_address_init() do { } while(0) #endif +extern void *page_rmapping(struct page *page); +extern struct anon_vma *page_anon_vma(struct page *page); extern struct address_space *page_mapping(struct page *page); -/* Neutral page->mapping pointer to address_space or anon_vma or other */ -static inline void *page_rmapping(struct page *page) -{ - return (void *)((unsigned long)page->mapping & ~PAGE_MAPPING_FLAGS); -} - extern struct address_space *__page_file_mapping(struct page *); static inline diff --git a/include/linux/rmap.h b/include/linux/rmap.h index c4c559a45dc8..c89c53a113a8 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -105,14 +105,6 @@ static inline void put_anon_vma(struct anon_vma *anon_vma) __put_anon_vma(anon_vma); } -static inline struct anon_vma *page_anon_vma(struct page *page) -{ - if (((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) != - PAGE_MAPPING_ANON) - return NULL; - return page_rmapping(page); -} - static inline void vma_lock_anon_vma(struct vm_area_struct *vma) { struct anon_vma *anon_vma = vma->anon_vma; -- cgit v1.2.3 From 923936157b158f36bd6a3d86496dce82b1a957de Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Wed, 15 Apr 2015 16:15:05 -0700 Subject: mm/mempool.c: kasan: poison mempool elements Mempools keep allocated objects in reserved for situations when ordinary allocation may not be possible to satisfy. These objects shouldn't be accessed before they leave the pool. This patch poison elements when get into the pool and unpoison when they leave it. This will let KASan to detect use-after-free of mempool's elements. Signed-off-by: Andrey Ryabinin Tested-by: David Rientjes Cc: Catalin Marinas Cc: Dmitry Chernenkov Cc: Dmitry Vyukov Cc: Alexander Potapenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 5bb074431eb0..5486d777b706 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -44,6 +44,7 @@ void kasan_poison_object_data(struct kmem_cache *cache, void *object); void kasan_kmalloc_large(const void *ptr, size_t size); void kasan_kfree_large(const void *ptr); +void kasan_kfree(void *ptr); void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size); void kasan_krealloc(const void *object, size_t new_size); @@ -71,6 +72,7 @@ static inline void kasan_poison_object_data(struct kmem_cache *cache, static inline void kasan_kmalloc_large(void *ptr, size_t size) {} static inline void kasan_kfree_large(const void *ptr) {} +static inline void kasan_kfree(void *ptr) {} static inline void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size) {} static inline void kasan_krealloc(const void *object, size_t new_size) {} -- cgit v1.2.3 From dd9061846a3ba01b0fa45423aaa087e4a69187fa Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Wed, 15 Apr 2015 16:15:11 -0700 Subject: mm: new pfn_mkwrite same as page_mkwrite for VM_PFNMAP This will allow FS that uses VM_PFNMAP | VM_MIXEDMAP (no page structs) to get notified when access is a write to a read-only PFN. This can happen if we mmap() a file then first mmap-read from it to page-in a read-only PFN, than we mmap-write to the same page. We need this functionality to fix a DAX bug, where in the scenario above we fail to set ctime/mtime though we modified the file. An xfstest is attached to this patchset that shows the failure and the fix. (A DAX patch will follow) This functionality is extra important for us, because upon dirtying of a pmem page we also want to RDMA the page to a remote cluster node. We define a new pfn_mkwrite and do not reuse page_mkwrite because 1 - The name ;-) 2 - But mainly because it would take a very long and tedious audit of all page_mkwrite functions of VM_MIXEDMAP/VM_PFNMAP users. To make sure they do not now CRASH. For example current DAX code (which this is for) would crash. If we would want to reuse page_mkwrite, We will need to first patch all users, so to not-crash-on-no-page. Then enable this patch. But even if I did that I would not sleep so well at night. Adding a new vector is the safest thing to do, and is not that expensive. an extra pointer at a static function vector per driver. Also the new vector is better for performance, because else we Will call all current Kernel vectors, so to: check-ha-no-page-do-nothing and return. No need to call it from do_shared_fault because do_wp_page is called to change pte permissions anyway. Signed-off-by: Yigal Korman Signed-off-by: Boaz Harrosh Acked-by: Kirill A. Shutemov Cc: Matthew Wilcox Cc: Jan Kara Cc: Hugh Dickins Cc: Mel Gorman Cc: Dave Chinner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 0e7bb2194da5..8b086070c3a5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -251,6 +251,9 @@ struct vm_operations_struct { * writable, if an error is returned it will cause a SIGBUS */ int (*page_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf); + /* same as page_mkwrite when using VM_PFNMAP|VM_MIXEDMAP */ + int (*pfn_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf); + /* called by access_process_vm when get_user_pages() fails, typically * for use by special VMAs that can switch between memory and hardware */ -- cgit v1.2.3 From 0e3b210ce1722168227cb3bc7746256d0c0afece Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Wed, 15 Apr 2015 16:15:14 -0700 Subject: dax: use pfn_mkwrite to update c/mtime + freeze protection From: Yigal Korman [v1] Without this patch, c/mtime is not updated correctly when mmap'ed page is first read from and then written to. A new xfstest is submitted for testing this (generic/080) [v2] Jan Kara has pointed out that if we add the sb_start/end_pagefault pair in the new pfn_mkwrite we are then fixing another bug where: A user could start writing to the page while filesystem is frozen. Signed-off-by: Yigal Korman Signed-off-by: Boaz Harrosh Reviewed-by: Jan Kara Cc: Matthew Wilcox Cc: Dave Chinner Cc: Hugh Dickins Cc: Mel Gorman Cc: Kirill A. Shutemov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 60733bdc74b4..0f696328f218 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2620,6 +2620,7 @@ int dax_clear_blocks(struct inode *, sector_t block, long size); int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); int dax_truncate_page(struct inode *, loff_t from, get_block_t); int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); +int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *); #define dax_mkwrite(vma, vmf, gb) dax_fault(vma, vmf, gb) #ifdef CONFIG_BLOCK -- cgit v1.2.3 From 312fcae227037619dc858c9ccd362c7b847730a2 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 15 Apr 2015 16:15:30 -0700 Subject: zsmalloc: support compaction This patch provides core functions for migration of zsmalloc. Migraion policy is simple as follows. for each size class { while { src_page = get zs_page from ZS_ALMOST_EMPTY if (!src_page) break; dst_page = get zs_page from ZS_ALMOST_FULL if (!dst_page) dst_page = get zs_page from ZS_ALMOST_EMPTY if (!dst_page) break; migrate(from src_page, to dst_page); } } For migration, we need to identify which objects in zspage are allocated to migrate them out. We could know it by iterating of freed objects in a zspage because first_page of zspage keeps free objects singly-linked list but it's not efficient. Instead, this patch adds a tag(ie, OBJ_ALLOCATED_TAG) in header of each object(ie, handle) so we could check whether the object is allocated easily. This patch adds another status bit in handle to synchronize between user access through zs_map_object and migration. During migration, we cannot move objects user are using due to data coherency between old object and new object. [akpm@linux-foundation.org: zsmalloc.c needs sched.h for cond_resched()] Signed-off-by: Minchan Kim Cc: Juneho Choi Cc: Gunho Lee Cc: Luigi Semenzato Cc: Dan Streetman Cc: Seth Jennings Cc: Nitin Gupta Cc: Jerome Marchand Cc: Sergey Senozhatsky Cc: Joonsoo Kim Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/zsmalloc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index 3283c6a55425..1338190b5478 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -47,5 +47,6 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, void zs_unmap_object(struct zs_pool *pool, unsigned long handle); unsigned long zs_get_total_pages(struct zs_pool *pool); +unsigned long zs_compact(struct zs_pool *pool); #endif -- cgit v1.2.3 From 23f40a94d860449f39f00c3350bf850d15983e63 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 15 Apr 2015 16:16:33 -0700 Subject: include/linux: remove empty conditionals Commit 607ca46e97a1 ("UAPI: (Scripted) Disintegrate include/linux") left behind some empty conditional blocks. Since they are useless and may cause a reader to wonder whether something is missing, remove them. Signed-off-by: Rasmus Villemoes Cc: Geert Uytterhoeven Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/a.out.h | 67 --------------------------------------------------- include/linux/types.h | 6 ----- 2 files changed, 73 deletions(-) (limited to 'include/linux') diff --git a/include/linux/a.out.h b/include/linux/a.out.h index 220f14338895..ee884168989f 100644 --- a/include/linux/a.out.h +++ b/include/linux/a.out.h @@ -4,44 +4,6 @@ #include #ifndef __ASSEMBLY__ -#if defined (M_OLDSUN2) -#else -#endif -#if defined (M_68010) -#else -#endif -#if defined (M_68020) -#else -#endif -#if defined (M_SPARC) -#else -#endif -#if !defined (N_MAGIC) -#endif -#if !defined (N_BADMAG) -#endif -#if !defined (N_TXTOFF) -#endif -#if !defined (N_DATOFF) -#endif -#if !defined (N_TRELOFF) -#endif -#if !defined (N_DRELOFF) -#endif -#if !defined (N_SYMOFF) -#endif -#if !defined (N_STROFF) -#endif -#if !defined (N_TXTADDR) -#endif -#if defined(vax) || defined(hp300) || defined(pyr) -#endif -#ifdef sony -#endif /* Sony. */ -#ifdef is68k -#endif -#if defined(m68k) && defined(PORTAR) -#endif #ifdef linux #include #if defined(__i386__) || defined(__mc68000__) @@ -51,34 +13,5 @@ #endif #endif #endif -#ifndef N_DATADDR -#endif -#if !defined (N_BSSADDR) -#endif -#if !defined (N_NLIST_DECLARED) -#endif /* no N_NLIST_DECLARED. */ -#if !defined (N_UNDF) -#endif -#if !defined (N_ABS) -#endif -#if !defined (N_TEXT) -#endif -#if !defined (N_DATA) -#endif -#if !defined (N_BSS) -#endif -#if !defined (N_FN) -#endif -#if !defined (N_EXT) -#endif -#if !defined (N_TYPE) -#endif -#if !defined (N_STAB) -#endif -#if !defined (N_RELOCATION_INFO_DECLARED) -#ifdef NS32K -#else -#endif -#endif /* no N_RELOCATION_INFO_DECLARED. */ #endif /*__ASSEMBLY__ */ #endif /* __A_OUT_GNU_H__ */ diff --git a/include/linux/types.h b/include/linux/types.h index 6747247e3f9f..59698be03490 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -146,12 +146,6 @@ typedef u64 dma_addr_t; typedef u32 dma_addr_t; #endif /* dma_addr_t */ -#ifdef __CHECKER__ -#else -#endif -#ifdef __CHECK_ENDIAN__ -#else -#endif typedef unsigned __bitwise__ gfp_t; typedef unsigned __bitwise__ fmode_t; typedef unsigned __bitwise__ oom_flags_t; -- cgit v1.2.3 From 2813893f8b197a14f1e1ddb04d99bce46817c84a Mon Sep 17 00:00:00 2001 From: Iulia Manda Date: Wed, 15 Apr 2015 16:16:41 -0700 Subject: kernel: conditionally support non-root users, groups and capabilities There are a lot of embedded systems that run most or all of their functionality in init, running as root:root. For these systems, supporting multiple users is not necessary. This patch adds a new symbol, CONFIG_MULTIUSER, that makes support for non-root users, non-root groups, and capabilities optional. It is enabled under CONFIG_EXPERT menu. When this symbol is not defined, UID and GID are zero in any possible case and processes always have all capabilities. The following syscalls are compiled out: setuid, setregid, setgid, setreuid, setresuid, getresuid, setresgid, getresgid, setgroups, getgroups, setfsuid, setfsgid, capget, capset. Also, groups.c is compiled out completely. In kernel/capability.c, capable function was moved in order to avoid adding two ifdef blocks. This change saves about 25 KB on a defconfig build. The most minimal kernels have total text sizes in the high hundreds of kB rather than low MB. (The 25k goes down a bit with allnoconfig, but not that much. The kernel was booted in Qemu. All the common functionalities work. Adding users/groups is not possible, failing with -ENOSYS. Bloat-o-meter output: add/remove: 7/87 grow/shrink: 19/397 up/down: 1675/-26325 (-24650) [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Iulia Manda Reviewed-by: Josh Triplett Acked-by: Geert Uytterhoeven Tested-by: Paul E. McKenney Reviewed-by: Paul E. McKenney Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/capability.h | 29 +++++++++++++++++++++++++++++ include/linux/cred.h | 23 +++++++++++++++++++---- include/linux/uidgid.h | 12 ++++++++++++ 3 files changed, 60 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index aa93e5ef594c..af9f0b9e80e6 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -205,6 +205,7 @@ static inline kernel_cap_t cap_raise_nfsd_set(const kernel_cap_t a, cap_intersect(permitted, __cap_nfsd_set)); } +#ifdef CONFIG_MULTIUSER extern bool has_capability(struct task_struct *t, int cap); extern bool has_ns_capability(struct task_struct *t, struct user_namespace *ns, int cap); @@ -213,6 +214,34 @@ extern bool has_ns_capability_noaudit(struct task_struct *t, struct user_namespace *ns, int cap); extern bool capable(int cap); extern bool ns_capable(struct user_namespace *ns, int cap); +#else +static inline bool has_capability(struct task_struct *t, int cap) +{ + return true; +} +static inline bool has_ns_capability(struct task_struct *t, + struct user_namespace *ns, int cap) +{ + return true; +} +static inline bool has_capability_noaudit(struct task_struct *t, int cap) +{ + return true; +} +static inline bool has_ns_capability_noaudit(struct task_struct *t, + struct user_namespace *ns, int cap) +{ + return true; +} +static inline bool capable(int cap) +{ + return true; +} +static inline bool ns_capable(struct user_namespace *ns, int cap) +{ + return true; +} +#endif /* CONFIG_MULTIUSER */ extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap); extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap); diff --git a/include/linux/cred.h b/include/linux/cred.h index 2fb2ca2127ed..8b6c083e68a7 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -62,9 +62,27 @@ do { \ groups_free(group_info); \ } while (0) -extern struct group_info *groups_alloc(int); extern struct group_info init_groups; +#ifdef CONFIG_MULTIUSER +extern struct group_info *groups_alloc(int); extern void groups_free(struct group_info *); + +extern int in_group_p(kgid_t); +extern int in_egroup_p(kgid_t); +#else +static inline void groups_free(struct group_info *group_info) +{ +} + +static inline int in_group_p(kgid_t grp) +{ + return 1; +} +static inline int in_egroup_p(kgid_t grp) +{ + return 1; +} +#endif extern int set_current_groups(struct group_info *); extern void set_groups(struct cred *, struct group_info *); extern int groups_search(const struct group_info *, kgid_t); @@ -74,9 +92,6 @@ extern bool may_setgroups(void); #define GROUP_AT(gi, i) \ ((gi)->blocks[(i) / NGROUPS_PER_BLOCK][(i) % NGROUPS_PER_BLOCK]) -extern int in_group_p(kgid_t); -extern int in_egroup_p(kgid_t); - /* * The security context of a task * diff --git a/include/linux/uidgid.h b/include/linux/uidgid.h index 2d1f9b627f91..0ee05da38899 100644 --- a/include/linux/uidgid.h +++ b/include/linux/uidgid.h @@ -29,6 +29,7 @@ typedef struct { #define KUIDT_INIT(value) (kuid_t){ value } #define KGIDT_INIT(value) (kgid_t){ value } +#ifdef CONFIG_MULTIUSER static inline uid_t __kuid_val(kuid_t uid) { return uid.val; @@ -38,6 +39,17 @@ static inline gid_t __kgid_val(kgid_t gid) { return gid.val; } +#else +static inline uid_t __kuid_val(kuid_t uid) +{ + return 0; +} + +static inline gid_t __kgid_val(kgid_t gid) +{ + return 0; +} +#endif #define GLOBAL_ROOT_UID KUIDT_INIT(0) #define GLOBAL_ROOT_GID KGIDT_INIT(0) -- cgit v1.2.3 From 96831c0a6738f88f89e7012f4df0a747514af0a0 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 15 Apr 2015 16:16:44 -0700 Subject: kernel/resource.c: remove deprecated __check_region() and friends All users of __check_region(), check_region(), and check_mem_region() are gone. We got rid of the last user in v4.0-rc1. Remove them. bloat-o-meter on x86_64 shows: add/remove: 0/3 grow/shrink: 0/0 up/down: 0/-102 (-102) function old new delta __kstrtab___check_region 15 - -15 __ksymtab___check_region 16 - -16 __check_region 71 - -71 Signed-off-by: Jakub Sitnicki Cc: Bjorn Helgaas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ioport.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 2c5250222278..388e3ae94f7a 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -196,10 +196,8 @@ extern struct resource * __request_region(struct resource *, /* Compatibility cruft */ #define release_region(start,n) __release_region(&ioport_resource, (start), (n)) -#define check_mem_region(start,n) __check_region(&iomem_resource, (start), (n)) #define release_mem_region(start,n) __release_region(&iomem_resource, (start), (n)) -extern int __check_region(struct resource *, resource_size_t, resource_size_t); extern void __release_region(struct resource *, resource_size_t, resource_size_t); #ifdef CONFIG_MEMORY_HOTREMOVE @@ -207,12 +205,6 @@ extern int release_mem_region_adjustable(struct resource *, resource_size_t, resource_size_t); #endif -static inline int __deprecated check_region(resource_size_t s, - resource_size_t n) -{ - return __check_region(&ioport_resource, s, n); -} - /* Wrappers for managed devices */ struct device; -- cgit v1.2.3 From 7a54f46b301cfab8a0d7365aa186545f8b98f22e Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Wed, 15 Apr 2015 16:16:53 -0700 Subject: kernel/reboot.c: add orderly_reboot for graceful reboot The kernel has orderly_poweroff which allows the kernel to initiate a graceful shutdown of userspace, by running /sbin/poweroff. This adds orderly_reboot that will cause userspace to shut itself down by calling /sbin/reboot. This will be used for shutdown initiated by a system controller on platforms that do not use ACPI. orderly_reboot() should be used when the system wants to allow userspace to gracefully shut itself down. For cases where the system may imminently catch on fire, the existing emergency_restart() provides an immediate reboot without involving userspace. Signed-off-by: Joel Stanley Cc: Fabian Frederick Cc: Benjamin Herrenschmidt Cc: Michael Ellerman Cc: Rusty Russell Cc: Jeremy Kerr Cc: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/reboot.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/reboot.h b/include/linux/reboot.h index 67fc8fcdc4b0..a7ff409f386d 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -70,7 +70,8 @@ void ctrl_alt_del(void); #define POWEROFF_CMD_PATH_LEN 256 extern char poweroff_cmd[POWEROFF_CMD_PATH_LEN]; -extern int orderly_poweroff(bool force); +extern void orderly_poweroff(bool force); +extern void orderly_reboot(void); /* * Emergency restart, callable from an interrupt handler. -- cgit v1.2.3 From 7b1460eccad0621e0c48f52bbedeb7adddc55ac9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 15 Apr 2015 16:16:59 -0700 Subject: printk: comment pr_cont() stating it is only to continue a line KERN_CONT is nicely commented in kern_levels.h, but pr_cont() is now used more often, and it lacks the comment stating what it is used for. It can be confused as continuing the log level, but that is not its purpose. Its purpose is to continue a line that had no newline enclosed. This should be documented by pr_cont() as well. Signed-off-by: Steven Rostedt Acked-by: Borislav Petkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/printk.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/printk.h b/include/linux/printk.h index baa3f97d8ce8..9b30871c9149 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -255,6 +255,11 @@ extern asmlinkage void dump_stack(void) __cold; printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) #define pr_info(fmt, ...) \ printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) +/* + * Like KERN_CONT, pr_cont() should only be used when continuing + * a line with no newline ('\n') enclosed. Otherwise it defaults + * back to KERN_DEFAULT. + */ #define pr_cont(fmt, ...) \ printk(KERN_CONT fmt, ##__VA_ARGS__) -- cgit v1.2.3 From 41416f2330112d29f2cfa337bfc7e672bf0c2768 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 15 Apr 2015 16:17:28 -0700 Subject: lib/string_helpers.c: change semantics of string_escape_mem The current semantics of string_escape_mem are inadequate for one of its current users, vsnprintf(). If that is to honour its contract, it must know how much space would be needed for the entire escaped buffer, and string_escape_mem provides no way of obtaining that (short of allocating a large enough buffer (~4 times input string) to let it play with, and that's definitely a big no-no inside vsnprintf). So change the semantics for string_escape_mem to be more snprintf-like: Return the size of the output that would be generated if the destination buffer was big enough, but of course still only write to the part of dst it is allowed to, and (contrary to snprintf) don't do '\0'-termination. It is then up to the caller to detect whether output was truncated and to append a '\0' if desired. Also, we must output partial escape sequences, otherwise a call such as snprintf(buf, 3, "%1pE", "\123") would cause printf to write a \0 to buf[2] but leaving buf[0] and buf[1] with whatever they previously contained. This also fixes a bug in the escaped_string() helper function, which used to unconditionally pass a length of "end-buf" to string_escape_mem(); since the latter doesn't check osz for being insanely large, it would happily write to dst. For example, kasprintf(GFP_KERNEL, "something and then %pE", ...); is an easy way to trigger an oops. In test-string_helpers.c, the -ENOMEM test is replaced with testing for getting the expected return value even if the buffer is too small. We also ensure that nothing is written (by relying on a NULL pointer deref) if the output size is 0 by passing NULL - this has to work for kasprintf("%pE") to work. In net/sunrpc/cache.c, I think qword_add still has the same semantics. Someone should definitely double-check this. In fs/proc/array.c, I made the minimum possible change, but longer-term it should stop poking around in seq_file internals. [andriy.shevchenko@linux.intel.com: simplify qword_add] [andriy.shevchenko@linux.intel.com: add missed curly braces] Signed-off-by: Rasmus Villemoes Acked-by: Andy Shevchenko Signed-off-by: Andy Shevchenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/string_helpers.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index 657571817260..0991913f4953 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -47,22 +47,22 @@ static inline int string_unescape_any_inplace(char *buf) #define ESCAPE_ANY_NP (ESCAPE_ANY | ESCAPE_NP) #define ESCAPE_HEX 0x20 -int string_escape_mem(const char *src, size_t isz, char **dst, size_t osz, +int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz, unsigned int flags, const char *esc); static inline int string_escape_mem_any_np(const char *src, size_t isz, - char **dst, size_t osz, const char *esc) + char *dst, size_t osz, const char *esc) { return string_escape_mem(src, isz, dst, osz, ESCAPE_ANY_NP, esc); } -static inline int string_escape_str(const char *src, char **dst, size_t sz, +static inline int string_escape_str(const char *src, char *dst, size_t sz, unsigned int flags, const char *esc) { return string_escape_mem(src, strlen(src), dst, sz, flags, esc); } -static inline int string_escape_str_any_np(const char *src, char **dst, +static inline int string_escape_str_any_np(const char *src, char *dst, size_t sz, const char *esc) { return string_escape_str(src, dst, sz, ESCAPE_ANY_NP, esc); -- cgit v1.2.3 From 89c1e79eb302349fcaf0697bc9116a4ff16bfeb0 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 15 Apr 2015 16:17:42 -0700 Subject: linux/bitmap.h: improve BITMAP_{LAST,FIRST}_WORD_MASK The macro BITMAP_LAST_WORD_MASK can be implemented without a conditional, which will generally lead to slightly better generated code (221 bytes saved for allmodconfig-GCOV_KERNEL, ~2k with GCOV_KERNEL). As a small bonus, this also ensures that the nbits parameter is expanded exactly once. In BITMAP_FIRST_WORD_MASK, if start is signed gcc is technically allowed to assume it is positive (or divisible by BITS_PER_LONG), and hence just do the simple mask. It doesn't seem to use this, and even on an architecture like x86 where the shift only depends on the lower 5 or 6 bits, and these bits are not affected by the signedness of the expression, gcc still generates code to compute the C99 mandated value of start % BITS_PER_LONG. So just use a mask explicitly, also for consistency with BITMAP_LAST_WORD_MASK. Signed-off-by: Rasmus Villemoes Cc: Tejun Heo Reviewed-by: George Spelvin Cc: Yury Norov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index dbfbf4990005..be4fa5ddf36c 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -172,12 +172,8 @@ extern unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int extern int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits); -#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG)) -#define BITMAP_LAST_WORD_MASK(nbits) \ -( \ - ((nbits) % BITS_PER_LONG) ? \ - (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL \ -) +#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) +#define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) #define small_const_nbits(nbits) \ (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG) -- cgit v1.2.3 From 1527781d228cd88af6c2f78c13a9cb43b3f69f30 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 16 Apr 2015 12:33:51 +0930 Subject: cpumask: resurrect CPU_MASK_CPU0 We removed it in 2f0f267ea072 (cpumask: remove deprecated functions.), but grep shows it still used by MIPS, and not unreasonably. Signed-off-by: Rusty Russell --- include/linux/cpumask.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 89558d0b56ac..27e285b92b5f 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -814,4 +814,9 @@ cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask) [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \ } } +#define CPU_MASK_CPU0 \ +(cpumask_t) { { \ + [0] = 1UL \ +} } + #endif /* __LINUX_CPUMASK_H */ -- cgit v1.2.3 From e7877f52fd4a8d7012f9b0faecc047a50c132a79 Mon Sep 17 00:00:00 2001 From: Vince Bridgers Date: Wed, 15 Apr 2015 11:17:40 -0500 Subject: stmmac: Read tx-fifo-depth and rx-fifo-depth from the devicetree Read the tx-fifo-depth and rx-fifo-depth from the devicetree. The Synopsys stmmac controller fifos are configurable per product instance, and the fifo sizes are needed to configure certain features correctly such as flow control. Signed-off-by: Vince Bridgers Signed-off-by: David S. Miller --- include/linux/stmmac.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index cd63851b57f2..7f484a239f53 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -114,6 +114,8 @@ struct plat_stmmacenet_data { int maxmtu; int multicast_filter_bins; int unicast_filter_entries; + int tx_fifo_size; + int rx_fifo_size; void (*fix_mac_speed)(void *priv, unsigned int speed); void (*bus_setup)(void __iomem *ioaddr); void *(*setup)(struct platform_device *pdev); -- cgit v1.2.3 From 10b88a4b17d31a7409494b179dcb76e7ab2fcaea Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Thu, 12 Mar 2015 20:02:35 -0400 Subject: sparc: Break up monolithic iommu table/lock into finer graularity pools and lock Investigation of multithreaded iperf experiments on an ethernet interface show the iommu->lock as the hottest lock identified by lockstat, with something of the order of 21M contentions out of 27M acquisitions, and an average wait time of 26 us for the lock. This is not efficient. A more scalable design is to follow the ppc model, where the iommu_table has multiple pools, each stretching over a segment of the map, and with a separate lock for each pool. This model allows for better parallelization of the iommu map search. This patch adds the iommu range alloc/free function infrastructure. Signed-off-by: Sowmini Varadhan Signed-off-by: David S. Miller --- include/linux/iommu-common.h | 55 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 include/linux/iommu-common.h (limited to 'include/linux') diff --git a/include/linux/iommu-common.h b/include/linux/iommu-common.h new file mode 100644 index 000000000000..6be5c863f329 --- /dev/null +++ b/include/linux/iommu-common.h @@ -0,0 +1,55 @@ +#ifndef _LINUX_IOMMU_COMMON_H +#define _LINUX_IOMMU_COMMON_H + +#include +#include +#include + +#define IOMMU_POOL_HASHBITS 4 +#define IOMMU_NR_POOLS (1 << IOMMU_POOL_HASHBITS) + +struct iommu_pool { + unsigned long start; + unsigned long end; + unsigned long hint; + spinlock_t lock; +}; + +struct iommu_table; + +struct iommu_tbl_ops { + unsigned long (*cookie_to_index)(u64, void *); + void (*demap)(void *, unsigned long, unsigned long); + void (*reset)(struct iommu_table *); +}; + +struct iommu_table { + unsigned long page_table_map_base; + unsigned long page_table_shift; + unsigned long nr_pools; + const struct iommu_tbl_ops *iommu_tbl_ops; + unsigned long poolsize; + struct iommu_pool arena_pool[IOMMU_NR_POOLS]; + u32 flags; +#define IOMMU_HAS_LARGE_POOL 0x00000001 + struct iommu_pool large_pool; + unsigned long *map; +}; + +extern void iommu_tbl_pool_init(struct iommu_table *iommu, + unsigned long num_entries, + u32 page_table_shift, + const struct iommu_tbl_ops *iommu_tbl_ops, + bool large_pool, u32 npools); + +extern unsigned long iommu_tbl_range_alloc(struct device *dev, + struct iommu_table *iommu, + unsigned long npages, + unsigned long *handle, + unsigned int pool_hash); + +extern void iommu_tbl_range_free(struct iommu_table *iommu, + u64 dma_addr, unsigned long npages, + bool do_demap, void *demap_arg); + +#endif -- cgit v1.2.3 From 96541bac0b4e62efa42e7900d9b32e6baa9a214c Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 14 Apr 2015 13:06:12 +0200 Subject: Revert "mmc: core: Convert mmc_driver to device_driver" This reverts commit 6685ac62b2f0 ("mmc: core: Convert mmc_driver to device_driver") The reverted commit went too far in simplifing the device driver parts for mmc. Let's restore the old mmc_driver to enable driver core to sooner or later to remove the ->probe(), ->remove() and ->shutdown() callbacks from the struct device_driver. Note that, the old ->suspend|resume() callbacks in the struct mmc_driver don't need to be restored, since the mmc block layer has converted to the modern system PM ops. Fixes: 6685ac62b2f0 ("mmc: core: Convert mmc_driver to device_driver") Signed-off-by: Ulf Hansson Acked-by: Jaehoon Chung --- include/linux/mmc/card.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index a6cf4c063e4e..19f0175c0afa 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -512,8 +512,18 @@ static inline int mmc_card_broken_irq_polling(const struct mmc_card *c) #define mmc_dev_to_card(d) container_of(d, struct mmc_card, dev) -extern int mmc_register_driver(struct device_driver *); -extern void mmc_unregister_driver(struct device_driver *); +/* + * MMC device driver (e.g., Flash card, I/O card...) + */ +struct mmc_driver { + struct device_driver drv; + int (*probe)(struct mmc_card *); + void (*remove)(struct mmc_card *); + void (*shutdown)(struct mmc_card *); +}; + +extern int mmc_register_driver(struct mmc_driver *); +extern void mmc_unregister_driver(struct mmc_driver *); extern void mmc_fixup_device(struct mmc_card *card, const struct mmc_fixup *table); -- cgit v1.2.3 From 2c57a0e233d72f8c2e2404560dcf0188ac3cf5d7 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 16 Apr 2015 12:43:13 -0700 Subject: lib: find_*_bit reimplementation This patchset does rework to find_bit function family to achieve better performance, and decrease size of text. All rework is done in patch 1. Patches 2 and 3 are about code moving and renaming. It was boot-tested on x86_64 and MIPS (big-endian) machines. Performance tests were ran on userspace with code like this: /* addr[] is filled from /dev/urandom */ start = clock(); while (ret < nbits) ret = find_next_bit(addr, nbits, ret + 1); end = clock(); printf("%ld\t", (unsigned long) end - start); On Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz measurements are: (for find_next_bit, nbits is 8M, for find_first_bit - 80K) find_next_bit: find_first_bit: new current new current 26932 43151 14777 14925 26947 43182 14521 15423 26507 43824 15053 14705 27329 43759 14473 14777 26895 43367 14847 15023 26990 43693 15103 15163 26775 43299 15067 15232 27282 42752 14544 15121 27504 43088 14644 14858 26761 43856 14699 15193 26692 43075 14781 14681 27137 42969 14451 15061 ... ... find_next_bit performance gain is 35-40%; find_first_bit - no measurable difference. On ARM machine, there is arch-specific implementation for find_bit. Thanks a lot to George Spelvin and Rasmus Villemoes for hints and helpful discussions. This patch (of 3): New implementations takes less space in source file (see diffstat) and in object. For me it's 710 vs 453 bytes of text. It also shows better performance. find_last_bit description fixed due to obvious typo. [akpm@linux-foundation.org: include linux/bitmap.h, per Rasmus] Signed-off-by: Yury Norov Reviewed-by: Rasmus Villemoes Reviewed-by: George Spelvin Cc: Alexey Klimov Cc: David S. Miller Cc: Daniel Borkmann Cc: Hannes Frederic Sowa Cc: Lai Jiangshan Cc: Mark Salter Cc: AKASHI Takahiro Cc: Thomas Graf Cc: Valentin Rothberg Cc: Chris Wilson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitops.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 5d858e02997f..297f5bda4fdf 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -218,9 +218,9 @@ static inline unsigned long __ffs64(u64 word) /** * find_last_bit - find the last set bit in a memory region * @addr: The address to start the search at - * @size: The maximum size to search + * @size: The number of bits to search * - * Returns the bit number of the first set bit, or size. + * Returns the bit number of the last set bit, or size. */ extern unsigned long find_last_bit(const unsigned long *addr, unsigned long size); -- cgit v1.2.3 From 95d119528b0b8440a63bc13904e9873fc3a70503 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 16 Apr 2015 12:43:28 -0700 Subject: util_macros.h: add find_closest() macro This series unduplicates the code used to find the member in an array closest to 'x'. The first patch adds a macro implementing the algorithm in two flavors - for arrays sorted in ascending and descending order. The second updates Documentation/CodingStyle on the naming convention for local variables in macros resembling functions. Other three patches replace duplicated code with calls to one of these macros in some hwmon drivers. This patch (of 5): Searching for the member of an array closest to 'x' is duplicated in several places. Add a new include - util_macros.h - and two macros that implement this algorithm for arrays sorted both in ascending and descending order. Uses linear search. Signed-off-by: Bartosz Golaszewski Cc: Guenter Roeck Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/util_macros.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 include/linux/util_macros.h (limited to 'include/linux') diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h new file mode 100644 index 000000000000..d5f4fb69dba3 --- /dev/null +++ b/include/linux/util_macros.h @@ -0,0 +1,40 @@ +#ifndef _LINUX_HELPER_MACROS_H_ +#define _LINUX_HELPER_MACROS_H_ + +#define __find_closest(x, a, as, op) \ +({ \ + typeof(as) __fc_i, __fc_as = (as) - 1; \ + typeof(x) __fc_x = (x); \ + typeof(*a) *__fc_a = (a); \ + for (__fc_i = 0; __fc_i < __fc_as; __fc_i++) { \ + if (__fc_x op DIV_ROUND_CLOSEST(__fc_a[__fc_i] + \ + __fc_a[__fc_i + 1], 2)) \ + break; \ + } \ + (__fc_i); \ +}) + +/** + * find_closest - locate the closest element in a sorted array + * @x: The reference value. + * @a: The array in which to look for the closest element. Must be sorted + * in ascending order. + * @as: Size of 'a'. + * + * Returns the index of the element closest to 'x'. + */ +#define find_closest(x, a, as) __find_closest(x, a, as, <=) + +/** + * find_closest_descending - locate the closest element in a sorted array + * @x: The reference value. + * @a: The array in which to look for the closest element. Must be sorted + * in descending order. + * @as: Size of 'a'. + * + * Similar to find_closest() but 'a' is expected to be sorted in descending + * order. + */ +#define find_closest_descending(x, a, as) __find_closest(x, a, as, >=) + +#endif -- cgit v1.2.3 From f766093ecb647f5a87bfa456715abbbccee547ce Mon Sep 17 00:00:00 2001 From: Javi Merino Date: Thu, 16 Apr 2015 12:43:45 -0700 Subject: kernel.h: implement DIV_ROUND_CLOSEST_ULL We have grown a number of different implementations of DIV_ROUND_CLOSEST_ULL throughout the kernel. Move the i915 one to kernel.h so that it can be reused. Signed-off-by: Javi Merino Reviewed-by: Jeff Epler Cc: Jani Nikula Cc: David Airlie Cc: Guenter Roeck Acked-by: Daniel Vetter Cc: "Rafael J. Wysocki" Cc: Alex Elder Cc: Antti Palosaari Cc: Javi Merino Cc: Mauro Carvalho Chehab Cc: Mel Gorman Cc: Mike Turquette Cc: Stephen Boyd Cc: Stephen Hemminger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d6d630d31ef3..3a5b48e52a9e 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -103,6 +103,18 @@ (((__x) - ((__d) / 2)) / (__d)); \ } \ ) +/* + * Same as above but for u64 dividends. divisor must be a 32-bit + * number. + */ +#define DIV_ROUND_CLOSEST_ULL(x, divisor)( \ +{ \ + typeof(divisor) __d = divisor; \ + unsigned long long _tmp = (x) + (__d) / 2; \ + do_div(_tmp, __d); \ + _tmp; \ +} \ +) /* * Multiplies an integer by a fraction, while avoiding unnecessary -- cgit v1.2.3 From 2afe27c718b669b551895595873611ac39cc31e3 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 16 Apr 2015 12:44:00 -0700 Subject: lib/bitmap.c: bitmap_[empty,full]: remove code duplication bitmap_empty() has its own implementation. But it's clearly as simple as: find_first_bit(src, nbits) == nbits The same is true for 'bitmap_full'. Signed-off-by: Yury Norov Cc: George Spelvin Cc: Alexey Klimov Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index be4fa5ddf36c..ea17cca9e685 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -283,16 +283,16 @@ static inline int bitmap_empty(const unsigned long *src, unsigned nbits) { if (small_const_nbits(nbits)) return ! (*src & BITMAP_LAST_WORD_MASK(nbits)); - else - return __bitmap_empty(src, nbits); + + return find_first_bit(src, nbits) == nbits; } static inline int bitmap_full(const unsigned long *src, unsigned int nbits) { if (small_const_nbits(nbits)) return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits)); - else - return __bitmap_full(src, nbits); + + return find_first_zero_bit(src, nbits) == nbits; } static inline int bitmap_weight(const unsigned long *src, unsigned int nbits) -- cgit v1.2.3 From 5281f94ae7f54d2d1a48dbc10223fd12d2aea716 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 16 Apr 2015 12:45:45 -0700 Subject: drivers/rtc/rtc-s5m.c: add support for S2MPS13 RTC The S2MPS13 RTC is almost the same as S2MPS14. The differences when updating alarm are: 1. Set WUDR+AUDR field instead of WUDR+RUDR. 2. Clear the AUDR field later (it is not auto-cleared). Signed-off-by: Krzysztof Kozlowski Cc: Alexandre Belloni Cc: Alessandro Zummo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mfd/samsung/rtc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/samsung/rtc.h b/include/linux/mfd/samsung/rtc.h index b6401e7661c7..29c30ac36020 100644 --- a/include/linux/mfd/samsung/rtc.h +++ b/include/linux/mfd/samsung/rtc.h @@ -105,6 +105,8 @@ enum s2mps_rtc_reg { #define S5M_RTC_UDR_MASK (1 << S5M_RTC_UDR_SHIFT) #define S2MPS_RTC_WUDR_SHIFT 4 #define S2MPS_RTC_WUDR_MASK (1 << S2MPS_RTC_WUDR_SHIFT) +#define S2MPS13_RTC_AUDR_SHIFT 1 +#define S2MPS13_RTC_AUDR_MASK (1 << S2MPS13_RTC_AUDR_SHIFT) #define S2MPS_RTC_RUDR_SHIFT 0 #define S2MPS_RTC_RUDR_MASK (1 << S2MPS_RTC_RUDR_SHIFT) #define RTC_TCON_SHIFT 1 -- cgit v1.2.3 From 16db3d3f1170fb0efca652c9378ce7c5f5cb4232 Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Thu, 16 Apr 2015 12:47:50 -0700 Subject: kernel/sysctl.c: threads-max observe limits Users can change the maximum number of threads by writing to /proc/sys/kernel/threads-max. With the patch the value entered is checked against the same limits that apply when fork_init is called. Signed-off-by: Heinrich Schuchardt Cc: Oleg Nesterov Cc: Ingo Molnar Cc: Guenter Roeck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sysctl.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index b7361f831226..795d5fea5697 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -212,4 +212,7 @@ static inline void setup_sysctl_set(struct ctl_table_set *p, #endif /* CONFIG_SYSCTL */ +int sysctl_max_threads(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); + #endif /* _LINUX_SYSCTL_H */ -- cgit v1.2.3 From 90f31d0ea88880f780574f3d0bb1a227c4c66ca3 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 16 Apr 2015 12:47:56 -0700 Subject: mm: rcu-protected get_mm_exe_file() This patch removes mm->mmap_sem from mm->exe_file read side. Also it kills dup_mm_exe_file() and moves exe_file duplication into dup_mmap() where both mmap_sems are locked. [akpm@linux-foundation.org: fix comment typo] Signed-off-by: Konstantin Khlebnikov Cc: Davidlohr Bueso Cc: Al Viro Cc: Oleg Nesterov Cc: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 1 + include/linux/mm_types.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index f4fc60727b8d..6bf7ab7c1573 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -870,6 +870,7 @@ static inline struct file *get_file(struct file *f) atomic_long_inc(&f->f_count); return f; } +#define get_file_rcu(x) atomic_long_inc_not_zero(&(x)->f_count) #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) #define file_count(x) atomic_long_read(&(x)->f_count) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 590630eb59ba..8d37e26a1007 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -429,7 +429,7 @@ struct mm_struct { #endif /* store ref to file /proc//exe symlink points to */ - struct file *exe_file; + struct file __rcu *exe_file; #ifdef CONFIG_MMU_NOTIFIER struct mmu_notifier_mm *mmu_notifier_mm; #endif -- cgit v1.2.3 From 02d699f1f464410d5753a034dd38c76a1a1647e0 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Thu, 16 Apr 2015 12:48:38 -0700 Subject: include/linux/kconfig.h: ese macros which are already defined It is better to use macros which are already available because then there is just one location which needs to be change. [akpm@linux-foundation.org: move IS_ENABLED definition to after the IS_BUILTIN and IS_MODULE definitions] Signed-off-by: Michal Simek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kconfig.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kconfig.h b/include/linux/kconfig.h index be342b94c640..63ca8dacec59 100644 --- a/include/linux/kconfig.h +++ b/include/linux/kconfig.h @@ -22,14 +22,6 @@ #define __config_enabled(arg1_or_junk) ___config_enabled(arg1_or_junk 1, 0) #define ___config_enabled(__ignored, val, ...) val -/* - * IS_ENABLED(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y' or 'm', - * 0 otherwise. - * - */ -#define IS_ENABLED(option) \ - (config_enabled(option) || config_enabled(option##_MODULE)) - /* * IS_BUILTIN(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y', 0 * otherwise. For boolean options, this is equivalent to @@ -43,4 +35,11 @@ */ #define IS_MODULE(option) config_enabled(option##_MODULE) +/* + * IS_ENABLED(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y' or 'm', + * 0 otherwise. + */ +#define IS_ENABLED(option) \ + (IS_BUILTIN(option) || IS_MODULE(option)) + #endif /* __LINUX_KCONFIG_H */ -- cgit v1.2.3 From 6c8c90319c0bb1c9e0b68e721359b89ae4f28465 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Thu, 16 Apr 2015 12:49:38 -0700 Subject: proc: show locks in /proc/pid/fdinfo/X Let's show locks which are associated with a file descriptor in its fdinfo file. Currently we don't have a reliable way to determine who holds a lock. We can find some information in /proc/locks, but PID which is reported there can be wrong. For example, a process takes a lock, then forks a child and dies. In this case /proc/locks contains the parent pid, which can be reused by another process. $ cat /proc/locks ... 6: FLOCK ADVISORY WRITE 324 00:13:13431 0 EOF ... $ ps -C rpcbind PID TTY TIME CMD 332 ? 00:00:00 rpcbind $ cat /proc/332/fdinfo/4 pos: 0 flags: 0100000 mnt_id: 22 lock: 1: FLOCK ADVISORY WRITE 324 00:13:13431 0 EOF $ ls -l /proc/332/fd/4 lr-x------ 1 root root 64 Mar 5 14:43 /proc/332/fd/4 -> /run/rpcbind.lock $ ls -l /proc/324/fd/ total 0 lrwx------ 1 root root 64 Feb 27 14:50 0 -> /dev/pts/0 lrwx------ 1 root root 64 Feb 27 14:50 1 -> /dev/pts/0 lrwx------ 1 root root 64 Feb 27 14:49 2 -> /dev/pts/0 You can see that the process with the 324 pid doesn't hold the lock. This information is required for proper dumping and restoring file locks. Signed-off-by: Andrey Vagin Cc: Jonathan Corbet Cc: Alexander Viro Acked-by: Jeff Layton Acked-by: "J. Bruce Fields" Acked-by: Cyrill Gorcunov Cc: Pavel Emelyanov Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6bf7ab7c1573..c4e927358503 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1042,6 +1042,9 @@ extern void lease_get_mtime(struct inode *, struct timespec *time); extern int generic_setlease(struct file *, long, struct file_lock **, void **priv); extern int vfs_setlease(struct file *, long, struct file_lock **, void **); extern int lease_modify(struct file_lock *, int, struct list_head *); +struct files_struct; +extern void show_fd_locks(struct seq_file *f, + struct file *filp, struct files_struct *files); #else /* !CONFIG_FILE_LOCKING */ static inline int fcntl_getlk(struct file *file, unsigned int cmd, struct flock __user *user) @@ -1178,6 +1181,10 @@ static inline int lease_modify(struct file_lock *fl, int arg, { return -EINVAL; } + +struct files_struct; +static inline void show_fd_locks(struct seq_file *f, + struct file *filp, struct files_struct *files) {} #endif /* !CONFIG_FILE_LOCKING */ -- cgit v1.2.3 From 569fd0ce96087283866ab8c438dac4bcf1738846 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 17 Apr 2015 08:28:50 -0600 Subject: blk-mq: fix iteration of busy bitmap Commit 889fa31f00b2 was a bit too eager in reducing the loop count, so we ended up missing queues in some configurations. Ensure that our division rounds up, so that's not the case. Reported-by: Guenter Roeck Fixes: 889fa31f00b2 ("blk-mq: reduce unnecessary software queue looping") Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 8210e8797c12..2056a99b92f8 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -13,7 +13,7 @@ struct blk_mq_cpu_notifier { }; struct blk_mq_ctxmap { - unsigned int map_size; + unsigned int size; unsigned int bits_per_word; struct blk_align_bitmap *map; }; -- cgit v1.2.3 From 8b86a61da37cbbcf4bd6e87fda494a59b1cf16c4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 17 Apr 2015 15:45:04 +0200 Subject: net: remove unused 'dev' argument from netif_needs_gso() In commit 04ffcb255f22 ("net: Add ndo_gso_check") Tom originally added the 'dev' argument to be able to call ndo_gso_check(). Then later, when generalizing this in commit 5f35227ea34b ("net: Generalize ndo_gso_check to ndo_features_check") Jesse removed the call to ndo_gso_check() in netif_needs_gso() by calling the new ndo_features_check() in a different place. This made the 'dev' argument unused. Remove the unused argument and go back to the code as before. Cc: Tom Herbert Cc: Jesse Gross Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b5679aed660b..bcbde799ec69 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3713,7 +3713,7 @@ static inline bool skb_gso_ok(struct sk_buff *skb, netdev_features_t features) (!skb_has_frag_list(skb) || (features & NETIF_F_FRAGLIST)); } -static inline bool netif_needs_gso(struct net_device *dev, struct sk_buff *skb, +static inline bool netif_needs_gso(struct sk_buff *skb, netdev_features_t features) { return skb_is_gso(skb) && (!skb_gso_ok(skb, features) || -- cgit v1.2.3 From e60841b441e1b3d176f235f36c79158dd2ed3ed8 Mon Sep 17 00:00:00 2001 From: Kedareswara rao Appana Date: Mon, 30 Mar 2015 18:48:29 +0530 Subject: dmaengine: vdma: Fix compilation warnings This patch fixes the following compilation warnings. In file included from drivers/dma/xilinx/xilinx_vdma.c:26:0: include/linux/dmapool.h:18:4: warning: 'struct device' declared inside parameter list size_t size, size_t align, size_t allocation); ^ include/linux/dmapool.h:18:4: warning: its scope is only this definition or declaration, which is probably not what you want include/linux/dmapool.h:31:7: warning: 'struct device' declared inside parameter list size_t size, size_t align, size_t allocation); ^ drivers/dma/xilinx/xilinx_vdma.c: In function 'xilinx_vdma_alloc_chan_resources': drivers/dma/xilinx/xilinx_vdma.c:501:20: warning: passing argument 2 of 'dma_pool_create' from incompatible pointer type chan->desc_pool = dma_pool_create("xilinx_vdma_desc_pool", ^ In file included from drivers/dma/xilinx/xilinx_vdma.c:26:0: include/linux/dmapool.h:17:18: note: expected 'struct device *' but argument is of type 'struct device *' struct dma_pool *dma_pool_create(const char *name, struct device *dev, . Signed-off-by: Kedareswara rao Appana Signed-off-by: Vinod Koul --- include/linux/dmapool.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmapool.h b/include/linux/dmapool.h index 022e34fcbd1b..52456aa566a0 100644 --- a/include/linux/dmapool.h +++ b/include/linux/dmapool.h @@ -14,6 +14,8 @@ #include #include +struct device; + struct dma_pool *dma_pool_create(const char *name, struct device *dev, size_t size, size_t align, size_t allocation); -- cgit v1.2.3 From c12f048ffdf3a5802239426dc290290929268dc9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 18 Apr 2015 12:31:25 -0700 Subject: sparc: Revert generic IOMMU allocator. I applied the wrong version of this patch series, V4 instead of V10, due to a patchwork bundling snafu. Signed-off-by: David S. Miller --- include/linux/iommu-common.h | 55 -------------------------------------------- 1 file changed, 55 deletions(-) delete mode 100644 include/linux/iommu-common.h (limited to 'include/linux') diff --git a/include/linux/iommu-common.h b/include/linux/iommu-common.h deleted file mode 100644 index 6be5c863f329..000000000000 --- a/include/linux/iommu-common.h +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef _LINUX_IOMMU_COMMON_H -#define _LINUX_IOMMU_COMMON_H - -#include -#include -#include - -#define IOMMU_POOL_HASHBITS 4 -#define IOMMU_NR_POOLS (1 << IOMMU_POOL_HASHBITS) - -struct iommu_pool { - unsigned long start; - unsigned long end; - unsigned long hint; - spinlock_t lock; -}; - -struct iommu_table; - -struct iommu_tbl_ops { - unsigned long (*cookie_to_index)(u64, void *); - void (*demap)(void *, unsigned long, unsigned long); - void (*reset)(struct iommu_table *); -}; - -struct iommu_table { - unsigned long page_table_map_base; - unsigned long page_table_shift; - unsigned long nr_pools; - const struct iommu_tbl_ops *iommu_tbl_ops; - unsigned long poolsize; - struct iommu_pool arena_pool[IOMMU_NR_POOLS]; - u32 flags; -#define IOMMU_HAS_LARGE_POOL 0x00000001 - struct iommu_pool large_pool; - unsigned long *map; -}; - -extern void iommu_tbl_pool_init(struct iommu_table *iommu, - unsigned long num_entries, - u32 page_table_shift, - const struct iommu_tbl_ops *iommu_tbl_ops, - bool large_pool, u32 npools); - -extern unsigned long iommu_tbl_range_alloc(struct device *dev, - struct iommu_table *iommu, - unsigned long npages, - unsigned long *handle, - unsigned int pool_hash); - -extern void iommu_tbl_range_free(struct iommu_table *iommu, - u64 dma_addr, unsigned long npages, - bool do_demap, void *demap_arg); - -#endif -- cgit v1.2.3 From ff7d37a502022149655c18035b99a53391be0383 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Thu, 9 Apr 2015 15:33:30 -0400 Subject: Break up monolithic iommu table/lock into finer graularity pools and lock Investigation of multithreaded iperf experiments on an ethernet interface show the iommu->lock as the hottest lock identified by lockstat, with something of the order of 21M contentions out of 27M acquisitions, and an average wait time of 26 us for the lock. This is not efficient. A more scalable design is to follow the ppc model, where the iommu_map_table has multiple pools, each stretching over a segment of the map, and with a separate lock for each pool. This model allows for better parallelization of the iommu map search. This patch adds the iommu range alloc/free function infrastructure. Signed-off-by: Sowmini Varadhan Acked-by: Benjamin Herrenschmidt Signed-off-by: David S. Miller --- include/linux/iommu-common.h | 51 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 include/linux/iommu-common.h (limited to 'include/linux') diff --git a/include/linux/iommu-common.h b/include/linux/iommu-common.h new file mode 100644 index 000000000000..bbced83b32ee --- /dev/null +++ b/include/linux/iommu-common.h @@ -0,0 +1,51 @@ +#ifndef _LINUX_IOMMU_COMMON_H +#define _LINUX_IOMMU_COMMON_H + +#include +#include +#include + +#define IOMMU_POOL_HASHBITS 4 +#define IOMMU_NR_POOLS (1 << IOMMU_POOL_HASHBITS) + +struct iommu_pool { + unsigned long start; + unsigned long end; + unsigned long hint; + spinlock_t lock; +}; + +struct iommu_map_table { + unsigned long table_map_base; + unsigned long table_shift; + unsigned long nr_pools; + void (*lazy_flush)(struct iommu_map_table *); + unsigned long poolsize; + struct iommu_pool pools[IOMMU_NR_POOLS]; + u32 flags; +#define IOMMU_HAS_LARGE_POOL 0x00000001 +#define IOMMU_NO_SPAN_BOUND 0x00000002 +#define IOMMU_NEED_FLUSH 0x00000004 + struct iommu_pool large_pool; + unsigned long *map; +}; + +extern void iommu_tbl_pool_init(struct iommu_map_table *iommu, + unsigned long num_entries, + u32 table_shift, + void (*lazy_flush)(struct iommu_map_table *), + bool large_pool, u32 npools, + bool skip_span_boundary_check); + +extern unsigned long iommu_tbl_range_alloc(struct device *dev, + struct iommu_map_table *iommu, + unsigned long npages, + unsigned long *handle, + unsigned long mask, + unsigned int align_order); + +extern void iommu_tbl_range_free(struct iommu_map_table *iommu, + u64 dma_addr, unsigned long npages, + unsigned long entry); + +#endif -- cgit v1.2.3 From 3ef650d3989677bd460497f9c3b0d58caaba0116 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 23 Mar 2015 13:35:03 -0700 Subject: libceph: osdmap.h: Add missing format newlines To avoid possible interleaving, add missing '\n' to formats. Convert pr_warning to pr_warn while there. Signed-off-by: Joe Perches Signed-off-by: Ilya Dryomov --- include/linux/ceph/osdmap.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 561ea896c657..e55c08bc3a96 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -175,13 +175,12 @@ static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid) __u8 version; if (!ceph_has_room(p, end, 1 + 8 + 4 + 4)) { - pr_warning("incomplete pg encoding"); - + pr_warn("incomplete pg encoding\n"); return -EINVAL; } version = ceph_decode_8(p); if (version > 1) { - pr_warning("do not understand pg encoding %d > 1", + pr_warn("do not understand pg encoding %d > 1\n", (int)version); return -EINVAL; } -- cgit v1.2.3 From ff40f9ae95917b72b6acb6057471c99054b6ee24 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 25 Mar 2015 21:02:16 +0300 Subject: libceph, ceph: split ceph_show_options() Split ceph_show_options() into two pieces and move the piece responsible for printing client (libceph) options into net/ceph. This way people adding a libceph option wouldn't have to remember to update code in fs/ceph. Signed-off-by: Ilya Dryomov --- include/linux/ceph/libceph.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 16fff9608848..303be6ef7f94 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -191,6 +191,7 @@ extern struct ceph_options *ceph_parse_options(char *options, const char *dev_name, const char *dev_name_end, int (*parse_extra_token)(char *c, void *private), void *private); +int ceph_print_client_options(struct seq_file *m, struct ceph_client *client); extern void ceph_destroy_options(struct ceph_options *opt); extern int ceph_compare_options(struct ceph_options *new_opt, struct ceph_client *client); -- cgit v1.2.3 From 5cf7bd30120ead3db43ef9074be38018d9acf22f Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 25 Mar 2015 21:07:41 +0300 Subject: libceph: expose client options through debugfs Add a client_options attribute for showing libceph options. Signed-off-by: Ilya Dryomov --- include/linux/ceph/libceph.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 303be6ef7f94..30f92cefaa72 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -135,6 +135,7 @@ struct ceph_client { struct dentry *debugfs_dir; struct dentry *debugfs_monmap; struct dentry *debugfs_osdmap; + struct dentry *debugfs_options; #endif }; -- cgit v1.2.3 From 9571eb4f9617e89b3f979a3856b1296eba277bb1 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 25 Mar 2015 21:15:17 +0300 Subject: libceph: simplify our debugfs attr macro No need to do single_open()'s job ourselves. Signed-off-by: Ilya Dryomov --- include/linux/ceph/debugfs.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/debugfs.h b/include/linux/ceph/debugfs.h index 1df086d7882d..29cf897cc5cd 100644 --- a/include/linux/ceph/debugfs.h +++ b/include/linux/ceph/debugfs.h @@ -7,13 +7,7 @@ #define CEPH_DEFINE_SHOW_FUNC(name) \ static int name##_open(struct inode *inode, struct file *file) \ { \ - struct seq_file *sf; \ - int ret; \ - \ - ret = single_open(file, name, NULL); \ - sf = file->private_data; \ - sf->private = inode->i_private; \ - return ret; \ + return single_open(file, name, inode->i_private); \ } \ \ static const struct file_operations name##_fops = { \ -- cgit v1.2.3 From f4d03bd143c628b00f66cc2ec2c013767bdd1518 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 20 Apr 2015 09:08:49 -0700 Subject: smp: don't use 16-bit words for atomic accesses Yes, it should work, but it's a bad idea. Not only did ARM64 not have the 16-bit access code (there's a separate patch to add it), it's just not a good atomic type. Some architectures fundamentally don't do atomic accesses in them (alpha), and it's not like it saves any space here anyway because of structure packing issues. We normally should aim for flags to be "unsigned int" or "unsigned long". And if space is at a premium, use a single byte (although that causes problems on alpha again). There might be very special cases where a 16-byte entity is really wanted, but this is not one of them. Signed-off-by: Linus Torvalds --- include/linux/smp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index be91db2a7017..c4414074bd88 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -18,7 +18,7 @@ struct call_single_data { struct llist_node llist; smp_call_func_t func; void *info; - u16 flags; + unsigned int flags; }; /* total number of cpus in this system (may exceed NR_CPUS) */ -- cgit v1.2.3 From 4e18b9adf2f910ec4d30b811a74a5b626e6c6125 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 20 Apr 2015 14:10:04 -0700 Subject: net: add skb_checksum_complete_unset This function changes ip_summed to CHECKSUM_NONE if CHECKSUM_COMPLETE is set. This is called to discard checksum-complete when packet is being modified and checksum is not pulled for headers in a layer. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0991259643d6..06793b598f44 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3016,6 +3016,18 @@ static inline bool __skb_checksum_validate_needed(struct sk_buff *skb, */ #define CHECKSUM_BREAK 76 +/* Unset checksum-complete + * + * Unset checksum complete can be done when packet is being modified + * (uncompressed for instance) and checksum-complete value is + * invalidated. + */ +static inline void skb_checksum_complete_unset(struct sk_buff *skb) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->ip_summed = CHECKSUM_NONE; +} + /* Validate (init) checksum based on checksum complete. * * Return values: -- cgit v1.2.3 From d0e83059a6c9b04f00264a74b8f6439948de4613 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 20 Apr 2015 13:39:03 +0800 Subject: crypto: rng - Convert crypto_rng to new style crypto_type This patch converts the top-level crypto_rng to the "new" style. It was the last algorithm type added before we switched over to the new way of doing things exemplified by shash. All users will automatically switch over to the new interface. Note that this patch does not touch the low-level interface to rng implementations. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 10df5d2d093a..781f7d546020 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -655,19 +655,12 @@ struct compress_tfm { u8 *dst, unsigned int *dlen); }; -struct rng_tfm { - int (*rng_gen_random)(struct crypto_rng *tfm, u8 *rdata, - unsigned int dlen); - int (*rng_reset)(struct crypto_rng *tfm, u8 *seed, unsigned int slen); -}; - #define crt_ablkcipher crt_u.ablkcipher #define crt_aead crt_u.aead #define crt_blkcipher crt_u.blkcipher #define crt_cipher crt_u.cipher #define crt_hash crt_u.hash #define crt_compress crt_u.compress -#define crt_rng crt_u.rng struct crypto_tfm { @@ -680,7 +673,6 @@ struct crypto_tfm { struct cipher_tfm cipher; struct hash_tfm hash; struct compress_tfm compress; - struct rng_tfm rng; } crt_u; void (*exit)(struct crypto_tfm *tfm); @@ -714,10 +706,6 @@ struct crypto_hash { struct crypto_tfm base; }; -struct crypto_rng { - struct crypto_tfm base; -}; - enum { CRYPTOA_UNSPEC, CRYPTOA_ALG, -- cgit v1.2.3 From d8fbe341beb617ebb22b98fb893e4aa32ae2d864 Mon Sep 17 00:00:00 2001 From: Sumit Semwal Date: Fri, 23 Jan 2015 12:53:43 +0530 Subject: dma-buf: cleanup dma_buf_export() to make it easily extensible At present, dma_buf_export() takes a series of parameters, which makes it difficult to add any new parameters for exporters, if required. Make it simpler by moving all these parameters into a struct, and pass the struct * as parameter to dma_buf_export(). While at it, unite dma_buf_export_named() with dma_buf_export(), and change all callers accordingly. Reviewed-by: Maarten Lankhorst Reviewed-by: Daniel Thompson Acked-by: Mauro Carvalho Chehab Acked-by: Dave Airlie Signed-off-by: Sumit Semwal --- include/linux/dma-buf.h | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 694e1fe1c4b4..2f0b431b73e0 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -162,6 +162,33 @@ struct dma_buf_attachment { void *priv; }; +/** + * struct dma_buf_export_info - holds information needed to export a dma_buf + * @exp_name: name of the exporting module - useful for debugging. + * @ops: Attach allocator-defined dma buf ops to the new buffer + * @size: Size of the buffer + * @flags: mode flags for the file + * @resv: reservation-object, NULL to allocate default one + * @priv: Attach private data of allocator to this buffer + * + * This structure holds the information required to export the buffer. Used + * with dma_buf_export() only. + */ +struct dma_buf_export_info { + const char *exp_name; + const struct dma_buf_ops *ops; + size_t size; + int flags; + struct reservation_object *resv; + void *priv; +}; + +/** + * helper macro for exporters; zeros and fills in most common values + */ +#define DEFINE_DMA_BUF_EXPORT_INFO(a) \ + struct dma_buf_export_info a = { .exp_name = KBUILD_MODNAME } + /** * get_dma_buf - convenience wrapper for get_file. * @dmabuf: [in] pointer to dma_buf @@ -181,12 +208,7 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf, void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *dmabuf_attach); -struct dma_buf *dma_buf_export_named(void *priv, const struct dma_buf_ops *ops, - size_t size, int flags, const char *, - struct reservation_object *); - -#define dma_buf_export(priv, ops, size, flags, resv) \ - dma_buf_export_named(priv, ops, size, flags, KBUILD_MODNAME, resv) +struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info); int dma_buf_fd(struct dma_buf *dmabuf, int flags); struct dma_buf *dma_buf_get(int fd); -- cgit v1.2.3 From bff175238a2416110e2258989c462234baeb5f46 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 27 Mar 2015 17:50:00 +0100 Subject: uapi: Remove kernel internal declaration The enum nfs4_acl_whotype is only used in nfs4d's internal nfs4 acl representation. No longer expose it to user space. Signed-off-by: Andreas Gruenbacher Signed-off-by: J. Bruce Fields --- include/linux/nfs4.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index ed43cb74b11d..32201c269890 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -16,6 +16,13 @@ #include #include +enum nfs4_acl_whotype { + NFS4_ACL_WHO_NAMED = 0, + NFS4_ACL_WHO_OWNER, + NFS4_ACL_WHO_GROUP, + NFS4_ACL_WHO_EVERYONE, +}; + struct nfs4_ace { uint32_t type; uint32_t flag; -- cgit v1.2.3 From fe5cbc6e06c7d8b3a86f6f5491d74766bb5c2827 Mon Sep 17 00:00:00 2001 From: Markus Stockhausen Date: Mon, 15 Dec 2014 12:57:04 +1100 Subject: md/raid6 algorithms: delta syndrome functions v3: s-o-b comment, explanation of performance and descision for the start/stop implementation Implementing rmw functionality for RAID6 requires optimized syndrome calculation. Up to now we can only generate a complete syndrome. The target P/Q pages are always overwritten. With this patch we provide a framework for inplace P/Q modification. In the first place simply fill those functions with NULL values. xor_syndrome() has two additional parameters: start & stop. These will indicate the first and last page that are changing during a rmw run. That makes it possible to avoid several unneccessary loops and speed up calculation. The caller needs to implement the following logic to make the functions work. 1) xor_syndrome(disks, start, stop, ...): "Remove" all data of source blocks inside P/Q between (and including) start and end. 2) modify any block with start <= block <= stop 3) xor_syndrome(disks, start, stop, ...): "Reinsert" all data of source blocks into P/Q between (and including) start and end. Pages between start and stop that won't be changed should be filled with a pointer to the kernel zero page. The reasons for not taking NULL pages are: 1) Algorithms cross the whole source data line by line. Thus avoid additional branches. 2) Having a NULL page avoids calculating the XOR P parity but still need calulation steps for the Q parity. Depending on the algorithm unrolling that might be only a difference of 2 instructions per loop. The benchmark numbers of the gen_syndrome() functions are displayed in the kernel log. Do the same for the xor_syndrome() functions. This will help to analyze performance problems and give an rough estimate how well the algorithm works. The choice of the fastest algorithm will still depend on the gen_syndrome() performance. With the start/stop page implementation the speed can vary a lot in real life. E.g. a change of page 0 & page 15 on a stripe will be harder to compute than the case where page 0 & page 1 are XOR candidates. To be not to enthusiatic about the expected speeds we will run a worse case test that simulates a change on the upper half of the stripe. So we do: 1) calculation of P/Q for the upper pages 2) continuation of Q for the lower (empty) pages Signed-off-by: Markus Stockhausen Signed-off-by: NeilBrown --- include/linux/raid/pq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 73069cb6c54a..a7a06d1dcf9c 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -72,6 +72,7 @@ extern const char raid6_empty_zero_page[PAGE_SIZE]; /* Routine choices */ struct raid6_calls { void (*gen_syndrome)(int, size_t, void **); + void (*xor_syndrome)(int, int, int, size_t, void **); int (*valid)(void); /* Returns 1 if this routine set is usable */ const char *name; /* Name of this routine set */ int prefer; /* Has special performance attribute */ -- cgit v1.2.3 From 584acdd49cd2472ca0f5a06adbe979db82d0b4af Mon Sep 17 00:00:00 2001 From: Markus Stockhausen Date: Mon, 15 Dec 2014 12:57:05 +1100 Subject: md/raid5: activate raid6 rmw feature Glue it altogehter. The raid6 rmw path should work the same as the already existing raid5 logic. So emulate the prexor handling/flags and split functions as needed. 1) Enable xor_syndrome() in the async layer. 2) Split ops_run_prexor() into RAID4/5 and RAID6 logic. Xor the syndrome at the start of a rmw run as we did it before for the single parity. 3) Take care of rmw run in ops_run_reconstruct6(). Again process only the changed pages to get syndrome back into sync. 4) Enhance set_syndrome_sources() to fill NULL pages if we are in a rmw run. The lower layers will calculate start & end pages from that and call the xor_syndrome() correspondingly. 5) Adapt the several places where we ignored Q handling up to now. Performance numbers for a single E5630 system with a mix of 10 7200k desktop/server disks. 300 seconds random write with 8 threads onto a 3,2TB (10*400GB) RAID6 64K chunk without spare (group_thread_cnt=4) bsize rmw_level=1 rmw_level=0 rmw_level=1 rmw_level=0 skip_copy=1 skip_copy=1 skip_copy=0 skip_copy=0 4K 115 KB/s 141 KB/s 165 KB/s 140 KB/s 8K 225 KB/s 275 KB/s 324 KB/s 274 KB/s 16K 434 KB/s 536 KB/s 640 KB/s 534 KB/s 32K 751 KB/s 1,051 KB/s 1,234 KB/s 1,045 KB/s 64K 1,339 KB/s 1,958 KB/s 2,282 KB/s 1,962 KB/s 128K 2,673 KB/s 3,862 KB/s 4,113 KB/s 3,898 KB/s 256K 7,685 KB/s 7,539 KB/s 7,557 KB/s 7,638 KB/s 512K 19,556 KB/s 19,558 KB/s 19,652 KB/s 19,688 Kb/s Signed-off-by: Markus Stockhausen Signed-off-by: NeilBrown --- include/linux/async_tx.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 179b38ffd351..388574ea38ed 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -60,12 +60,15 @@ struct dma_chan_ref { * dependency chain * @ASYNC_TX_FENCE: specify that the next operation in the dependency * chain uses this operation's result as an input + * @ASYNC_TX_PQ_XOR_DST: do not overwrite the syndrome but XOR it with the + * input data. Required for rmw case. */ enum async_tx_flags { ASYNC_TX_XOR_ZERO_DST = (1 << 0), ASYNC_TX_XOR_DROP_DST = (1 << 1), ASYNC_TX_ACK = (1 << 2), ASYNC_TX_FENCE = (1 << 3), + ASYNC_TX_PQ_XOR_DST = (1 << 4), }; /** -- cgit v1.2.3 From acec27ff35af9caf34d76d16ee17ff3b292e7d83 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 21 Apr 2015 10:46:38 +0800 Subject: crypto: rng - Convert low-level crypto_rng to new style This patch converts the low-level crypto_rng interface to the "new" style. This allows existing implementations to be converted over one- by-one. Once that is complete we can then remove the old rng interface. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 781f7d546020..2fa9b05360f7 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -427,7 +427,7 @@ struct compress_alg { }; /** - * struct rng_alg - random number generator definition + * struct old_rng_alg - random number generator definition * @rng_make_random: The function defined by this variable obtains a random * number. The random number generator transform must generate * the random number out of the context provided with this @@ -445,7 +445,7 @@ struct compress_alg { * seeding is implemented internally without the need of support by * the consumer. In this case, the seed size is set to zero. */ -struct rng_alg { +struct old_rng_alg { int (*rng_make_random)(struct crypto_rng *tfm, u8 *rdata, unsigned int dlen); int (*rng_reset)(struct crypto_rng *tfm, u8 *seed, unsigned int slen); @@ -559,7 +559,7 @@ struct crypto_alg { struct blkcipher_alg blkcipher; struct cipher_alg cipher; struct compress_alg compress; - struct rng_alg rng; + struct old_rng_alg rng; } cra_u; int (*cra_init)(struct crypto_tfm *tfm); -- cgit v1.2.3 From 94f1bb15bed84ad6c893916b7e7b9db6f1d7eec6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 21 Apr 2015 10:46:46 +0800 Subject: crypto: rng - Remove old low-level rng interface Now that all rng implementations have switched over to the new interface, we can remove the old low-level interface. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 30 ------------------------------ 1 file changed, 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 2fa9b05360f7..ee14140f8893 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -138,7 +138,6 @@ struct crypto_async_request; struct crypto_aead; struct crypto_blkcipher; struct crypto_hash; -struct crypto_rng; struct crypto_tfm; struct crypto_type; struct aead_givcrypt_request; @@ -426,40 +425,12 @@ struct compress_alg { unsigned int slen, u8 *dst, unsigned int *dlen); }; -/** - * struct old_rng_alg - random number generator definition - * @rng_make_random: The function defined by this variable obtains a random - * number. The random number generator transform must generate - * the random number out of the context provided with this - * call. - * @rng_reset: Reset of the random number generator by clearing the entire state. - * With the invocation of this function call, the random number - * generator shall completely reinitialize its state. If the random - * number generator requires a seed for setting up a new state, - * the seed must be provided by the consumer while invoking this - * function. The required size of the seed is defined with - * @seedsize . - * @seedsize: The seed size required for a random number generator - * initialization defined with this variable. Some random number - * generators like the SP800-90A DRBG does not require a seed as the - * seeding is implemented internally without the need of support by - * the consumer. In this case, the seed size is set to zero. - */ -struct old_rng_alg { - int (*rng_make_random)(struct crypto_rng *tfm, u8 *rdata, - unsigned int dlen); - int (*rng_reset)(struct crypto_rng *tfm, u8 *seed, unsigned int slen); - - unsigned int seedsize; -}; - #define cra_ablkcipher cra_u.ablkcipher #define cra_aead cra_u.aead #define cra_blkcipher cra_u.blkcipher #define cra_cipher cra_u.cipher #define cra_compress cra_u.compress -#define cra_rng cra_u.rng /** * struct crypto_alg - definition of a cryptograpic cipher algorithm @@ -559,7 +530,6 @@ struct crypto_alg { struct blkcipher_alg blkcipher; struct cipher_alg cipher; struct compress_alg compress; - struct old_rng_alg rng; } cra_u; int (*cra_init)(struct crypto_tfm *tfm); -- cgit v1.2.3 From 0140e6141e4f1d4b15fb469e6912b0e71b7d1cc2 Mon Sep 17 00:00:00 2001 From: Sonny Rao Date: Tue, 21 Apr 2015 12:33:11 -0700 Subject: perf/x86/intel/uncore: Move PCI IDs for IMC to uncore driver This keeps all the related PCI IDs together in the driver where they are used. Signed-off-by: Sonny Rao Acked-by: Bjorn Helgaas Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1429644791-25724-1-git-send-email-sonnyrao@chromium.org Signed-off-by: Ingo Molnar --- include/linux/pci_ids.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index e63c02a93f6b..a59385852233 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2539,10 +2539,6 @@ #define PCI_VENDOR_ID_INTEL 0x8086 #define PCI_DEVICE_ID_INTEL_EESSC 0x0008 -#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100 -#define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154 -#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 -#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00 #define PCI_DEVICE_ID_INTEL_PXHD_0 0x0320 #define PCI_DEVICE_ID_INTEL_PXHD_1 0x0321 #define PCI_DEVICE_ID_INTEL_PXH_0 0x0329 -- cgit v1.2.3 From 4796cf9b02b5bea141632e21d64556a7eb883a65 Mon Sep 17 00:00:00 2001 From: Yingjoe Chen Date: Fri, 10 Apr 2015 21:55:50 +0800 Subject: time: Remove nonexistent function prototype The function clocksource_get_next() was removed in commit 75c5158f70 (timekeeping: Update clocksource with stop_machine), but the prototype was not removed with it. Remove the prototype. Signed-off-by: Yingjoe Chen Cc: Cc: Martin Schwidefsky Cc: Cc: John Stultz Link: http://lkml.kernel.org/r/1428674150-1780-1-git-send-email-yingjoe.chen@mediatek.com Signed-off-by: Thomas Gleixner --- include/linux/clocksource.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 135509821c39..a25fc6e873b8 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -181,7 +181,6 @@ static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift) extern int clocksource_unregister(struct clocksource*); extern void clocksource_touch_watchdog(void); -extern struct clocksource* clocksource_get_next(void); extern void clocksource_change_rating(struct clocksource *cs, int rating); extern void clocksource_suspend(void); extern void clocksource_resume(void); -- cgit v1.2.3 From 91e5a2170e795989da9f90c18ba18984f23acc5b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 13 Apr 2015 21:02:22 +0000 Subject: hrtimer: Document hrtimer_forward[_now]() proper Document the calling context conditions. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20150413210035.178751779@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 05f6df1fdf5b..7770676c387a 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -418,7 +418,22 @@ static inline int hrtimer_callback_running(struct hrtimer *timer) extern u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval); -/* Forward a hrtimer so it expires after the hrtimer's current now */ +/** + * hrtimer_forward_now - forward the timer expiry so it expires after now + * @timer: hrtimer to forward + * @interval: the interval to forward + * + * Forward the timer expiry so it will expire after the current time + * of the hrtimer clock base. Returns the number of overruns. + * + * Can be safely called from the callback function of @timer. If + * called from other contexts @timer must neither be enqueued nor + * running the callback and the caller needs to take care of + * serialization. + * + * Note: This only updates the timer expiry value and does not requeue + * the timer. + */ static inline u64 hrtimer_forward_now(struct hrtimer *timer, ktime_t interval) { -- cgit v1.2.3 From 398ca17fb54b212cdc9da7ff4a17a35c48dd2103 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:08:27 +0000 Subject: hrtimer: Get rid of the resolution field in hrtimer_clock_base The field has no value because all clock bases have the same resolution. The resolution only changes when we switch to high resolution timer mode. We can evaluate that from a single static variable as well. In the !HIGHRES case its simply a constant. Export the variable, so we can simplify the usage sites. Signed-off-by: Thomas Gleixner Reviewed-by: Preeti U Murthy Acked-by: Peter Zijlstra Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20150414203500.645454122@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 7770676c387a..bc6f91b5443b 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -137,7 +137,6 @@ struct hrtimer_sleeper { * timer to a base on another cpu. * @clockid: clock id for per_cpu support * @active: red black tree root node for the active timers - * @resolution: the resolution of the clock, in nanoseconds * @get_time: function to retrieve the current time of the clock * @softirq_time: the time when running the hrtimer queue in the softirq * @offset: offset of this clock to the monotonic base @@ -147,7 +146,6 @@ struct hrtimer_clock_base { int index; clockid_t clockid; struct timerqueue_head active; - ktime_t resolution; ktime_t (*get_time)(void); ktime_t softirq_time; ktime_t offset; @@ -295,11 +293,15 @@ extern void hrtimer_peek_ahead_timers(void); extern void clock_was_set_delayed(void); +extern unsigned int hrtimer_resolution; + #else # define MONOTONIC_RES_NSEC LOW_RES_NSEC # define KTIME_MONOTONIC_RES KTIME_LOW_RES +#define hrtimer_resolution LOW_RES_NSEC + static inline void hrtimer_peek_ahead_timers(void) { } /* -- cgit v1.2.3 From 056a3cacbc46e5aca27b350ce4ecb3b33ebb0700 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:08:32 +0000 Subject: hrtimer: Get rid of hrtimer_get_res() The resolution is directly accessible now. So its simpler just to fill in the values of the timespec and be done with it. Text size reduction (combined with "hrtimer: Get rid of the resolution field in hrtimer_clock_base"): x8664 -61, i386 -221, ARM -60, power64 -48 Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Preeti U Murthy Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20150414203500.879888080@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index bc6f91b5443b..8025156c8fa1 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -385,7 +385,6 @@ static inline int hrtimer_restart(struct hrtimer *timer) /* Query timers: */ extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer); -extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp); extern ktime_t hrtimer_get_next_event(void); -- cgit v1.2.3 From a6ffebce7f89f6f97cc22838a5d4383b15d6774f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:08:34 +0000 Subject: hrtimer: Make the statistics fields smaller No point in having usigned long for /proc/timer_list statistics. Make them unsigned int. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Preeti U Murthy Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20150414203500.959773467@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 8025156c8fa1..d39f2847754c 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -187,10 +187,10 @@ struct hrtimer_cpu_base { int in_hrtirq; int hres_active; int hang_detected; - unsigned long nr_events; - unsigned long nr_retries; - unsigned long nr_hangs; - ktime_t max_hang_time; + unsigned int nr_events; + unsigned int nr_retries; + unsigned int nr_hangs; + unsigned int max_hang_time; #endif struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; }; -- cgit v1.2.3 From 21d6d52a1b7028e6a6840bd82e354aefa9a5e203 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:08:35 +0000 Subject: hrtimer: Get rid of softirq time The softirq time field in the clock bases is an optimization from the early days of hrtimers. It provides a coarse "jiffies" like time mostly for self rearming timers. But that comes with a price: - Larger code size - Extra storage space - Duplicated functions with really small differences The benefit of this is optimization is marginal for contemporary systems. Consolidate everything on the high resolution timer implementation. This makes further optimizations possible. Text size reduction: x8664 -95, i386 -356, ARM -148, ARM64 -40, power64 -16 Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Preeti U Murthy Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20150414203501.039977424@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index d39f2847754c..e292830b58f0 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -138,7 +138,6 @@ struct hrtimer_sleeper { * @clockid: clock id for per_cpu support * @active: red black tree root node for the active timers * @get_time: function to retrieve the current time of the clock - * @softirq_time: the time when running the hrtimer queue in the softirq * @offset: offset of this clock to the monotonic base */ struct hrtimer_clock_base { @@ -147,7 +146,6 @@ struct hrtimer_clock_base { clockid_t clockid; struct timerqueue_head active; ktime_t (*get_time)(void); - ktime_t softirq_time; ktime_t offset; }; @@ -260,19 +258,16 @@ static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) return ktime_sub(timer->node.expires, timer->base->get_time()); } -#ifdef CONFIG_HIGH_RES_TIMERS -struct clock_event_device; - -extern void hrtimer_interrupt(struct clock_event_device *dev); - -/* - * In high resolution mode the time reference must be read accurate - */ static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) { return timer->base->get_time(); } +#ifdef CONFIG_HIGH_RES_TIMERS +struct clock_event_device; + +extern void hrtimer_interrupt(struct clock_event_device *dev); + static inline int hrtimer_is_hres_active(struct hrtimer *timer) { return timer->base->cpu_base->hres_active; @@ -304,15 +299,6 @@ extern unsigned int hrtimer_resolution; static inline void hrtimer_peek_ahead_timers(void) { } -/* - * In non high resolution mode the time reference is taken from - * the base softirq time variable. - */ -static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) -{ - return timer->base->softirq_time; -} - static inline int hrtimer_is_hres_active(struct hrtimer *timer) { return 0; -- cgit v1.2.3 From 868a3e915f7f5eba8f8cb4f7da2276760807c51c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:08:37 +0000 Subject: hrtimer: Make offset update smarter On every tick/hrtimer interrupt we update the offset variables of the clock bases. That's silly because these offsets change very seldom. Add a sequence counter to the time keeping code which keeps track of the offset updates (clock_was_set()). Have a sequence cache in the hrtimer cpu bases to evaluate whether the offsets must be updated or not. This allows us later to avoid pointless cacheline pollution. Signed-off-by: Thomas Gleixner Reviewed-by: Preeti U Murthy Acked-by: Peter Zijlstra Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Cc: John Stultz Link: http://lkml.kernel.org/r/20150414203501.132820245@linutronix.de Signed-off-by: Thomas Gleixner Cc: John Stultz --- include/linux/hrtimer.h | 4 ++-- include/linux/timekeeper_internal.h | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index e292830b58f0..5e04f8fc26f6 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -163,7 +163,7 @@ enum hrtimer_base_type { * and timers * @cpu: cpu number * @active_bases: Bitfield to mark bases with active timers - * @clock_was_set: Indicates that clock was set from irq context. + * @clock_was_set_seq: Sequence counter of clock was set events * @expires_next: absolute time of the next event which was scheduled * via clock_set_next_event() * @in_hrtirq: hrtimer_interrupt() is currently executing @@ -179,7 +179,7 @@ struct hrtimer_cpu_base { raw_spinlock_t lock; unsigned int cpu; unsigned int active_bases; - unsigned int clock_was_set; + unsigned int clock_was_set_seq; #ifdef CONFIG_HIGH_RES_TIMERS ktime_t expires_next; int in_hrtirq; diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index fb86963859c7..6f8276ae579c 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -49,6 +49,7 @@ struct tk_read_base { * @offs_boot: Offset clock monotonic -> clock boottime * @offs_tai: Offset clock monotonic -> clock tai * @tai_offset: The current UTC to TAI offset in seconds + * @clock_was_set_seq: The sequence number of clock was set events * @raw_time: Monotonic raw base time in timespec64 format * @cycle_interval: Number of clock cycles in one NTP interval * @xtime_interval: Number of clock shifted nano seconds in one NTP @@ -85,6 +86,7 @@ struct timekeeper { ktime_t offs_boot; ktime_t offs_tai; s32 tai_offset; + unsigned int clock_was_set_seq; struct timespec64 raw_time; /* The following members are for timekeeping internal use */ -- cgit v1.2.3 From e19ffe8be2cd0a1f726b235443eba21e64f6be5e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:08:39 +0000 Subject: hrtimer: Use bits for various boolean indicators No point in wasting 12 byte storage space. Generates better code as well. Text size reduction: x8664 -64, i386 -16, ARM -132, ARM64 -0, power64 -48 Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Preeti U Murthy Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20150414203501.227955358@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 5e04f8fc26f6..17a59ddcc79a 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -181,10 +181,10 @@ struct hrtimer_cpu_base { unsigned int active_bases; unsigned int clock_was_set_seq; #ifdef CONFIG_HIGH_RES_TIMERS + unsigned int in_hrtirq : 1, + hres_active : 1, + hang_detected : 1; ktime_t expires_next; - int in_hrtirq; - int hres_active; - int hang_detected; unsigned int nr_events; unsigned int nr_retries; unsigned int nr_hangs; -- cgit v1.2.3 From 6d9a1411393d51f17bee3fe163430b21b2cb2de9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:08:42 +0000 Subject: hrtimer: Cache line align the hrtimer cpu base We really want that data structure to start at a cache line boundary. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Preeti U Murthy Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20150414203501.417597627@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 17a59ddcc79a..0853f52f8ffb 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -191,7 +191,7 @@ struct hrtimer_cpu_base { unsigned int max_hang_time; #endif struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; -}; +} ____cacheline_aligned; static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) { -- cgit v1.2.3 From b8e38413ac2c33c497e72895fcd5da709fd1b908 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:08:44 +0000 Subject: hrtimer: Align the hrtimer clock bases as well We don't use cacheline_align here because that might waste lot of space on 32bit machine with 64 bytes cachelines and on 64bit machines with 128 bytes cachelines. The size of struct hrtimer_clock_base is 64byte on 64bit and 32byte on 32bit machines. So we utilize the cache lines proper. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Preeti U Murthy Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20150414203501.498165771@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 0853f52f8ffb..e5c22d611850 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -130,6 +130,12 @@ struct hrtimer_sleeper { struct task_struct *task; }; +#ifdef CONFIG_64BIT +# define HRTIMER_CLOCK_BASE_ALIGN 64 +#else +# define HRTIMER_CLOCK_BASE_ALIGN 32 +#endif + /** * struct hrtimer_clock_base - the timer base for a specific clock * @cpu_base: per cpu clock base @@ -147,7 +153,7 @@ struct hrtimer_clock_base { struct timerqueue_head active; ktime_t (*get_time)(void); ktime_t offset; -}; +} __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN))); enum hrtimer_base_type { HRTIMER_BASE_MONOTONIC, @@ -195,6 +201,8 @@ struct hrtimer_cpu_base { static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) { + BUILD_BUG_ON(sizeof(struct hrtimer_clock_base) > HRTIMER_CLOCK_BASE_ALIGN); + timer->node.expires = time; timer->_softexpires = time; } -- cgit v1.2.3 From c320642e1ced3b81592610e374894fea995f475b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:08:46 +0000 Subject: timerqueue: Let timerqueue_add/del return information The hrtimer code is interested whether the added timer is the first one to expire and whether the removed timer was the last one in the tree. The add/del routines have that information already. So we can return it right away instead of reevaluating it at the call site. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Preeti U Murthy Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Cc: John Stultz Link: http://lkml.kernel.org/r/20150414203501.579063647@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/timerqueue.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h index a520fd70a59f..7eec17ad7fa1 100644 --- a/include/linux/timerqueue.h +++ b/include/linux/timerqueue.h @@ -16,10 +16,10 @@ struct timerqueue_head { }; -extern void timerqueue_add(struct timerqueue_head *head, - struct timerqueue_node *node); -extern void timerqueue_del(struct timerqueue_head *head, - struct timerqueue_node *node); +extern bool timerqueue_add(struct timerqueue_head *head, + struct timerqueue_node *node); +extern bool timerqueue_del(struct timerqueue_head *head, + struct timerqueue_node *node); extern struct timerqueue_node *timerqueue_iterate_next( struct timerqueue_node *node); -- cgit v1.2.3 From 895bdfa793f6e912d1a58fc445b3dd4d686f7bd3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:08:49 +0000 Subject: hrtimer: Keep pointer to first timer and simplify __remove_hrtimer() __remove_hrtimer() needs to evaluate the expiry time to figure out whether the timer which is removed is eventually the first expiring timer on the cpu. Keep a pointer to it, which is lazily updated, so we can avoid the evaluation dance and retrieve the information from there. Generates slightly better code. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Preeti U Murthy Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20150414203501.752838019@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index e5c22d611850..d194c1dacdaa 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -172,6 +172,7 @@ enum hrtimer_base_type { * @clock_was_set_seq: Sequence counter of clock was set events * @expires_next: absolute time of the next event which was scheduled * via clock_set_next_event() + * @next_timer: Pointer to the first expiring timer * @in_hrtirq: hrtimer_interrupt() is currently executing * @hres_active: State of high resolution mode * @hang_detected: The last hrtimer interrupt detected a hang @@ -180,6 +181,10 @@ enum hrtimer_base_type { * @nr_hangs: Total number of hrtimer interrupt hangs * @max_hang_time: Maximum time spent in hrtimer_interrupt * @clock_base: array of clock bases for this cpu + * + * Note: next_timer is just an optimization for __remove_hrtimer(). + * Do not dereference the pointer because it is not reliable on + * cross cpu removals. */ struct hrtimer_cpu_base { raw_spinlock_t lock; @@ -191,6 +196,7 @@ struct hrtimer_cpu_base { hres_active : 1, hang_detected : 1; ktime_t expires_next; + struct hrtimer *next_timer; unsigned int nr_events; unsigned int nr_retries; unsigned int nr_hangs; -- cgit v1.2.3 From c6eb3f70d4482806dc2d3e1e3c7736f497b1d418 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:08:51 +0000 Subject: hrtimer: Get rid of hrtimer softirq hrtimer softirq is a leftover from the initial implementation and serves only the purpose to handle the enqueueing of already expired timers in the high resolution timer mode. We discussed whether we change the return value and force all start sites to handle that the timer is already expired, but that would be a Herculean task and I'm not sure whether its a good idea to enforce that handling on everyone. A simpler solution is to enforce a timer interrupt instead of raising and scheduling a softirq. Just use the existing infrastructure to do so and remove all the softirq leftovers. The HRTIMER softirq enum is now unused, but kept around because trace parsers rely on the existing numbering. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Preeti U Murthy Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20150414203501.840834708@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 1 - include/linux/interrupt.h | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index d194c1dacdaa..048270a27bc5 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -459,7 +459,6 @@ extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode); /* Soft interrupt function to run the hrtimer queues: */ extern void hrtimer_run_queues(void); -extern void hrtimer_run_pending(void); /* Bootup initialization: */ extern void __init hrtimers_init(void); diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 950ae4501826..6bf15a66bce7 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -413,7 +413,8 @@ enum BLOCK_IOPOLL_SOFTIRQ, TASKLET_SOFTIRQ, SCHED_SOFTIRQ, - HRTIMER_SOFTIRQ, + HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the + numbering. Sigh! */ RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ NR_SOFTIRQS -- cgit v1.2.3 From c1ad348b452aacd784fb97403d03d71723c72ee1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:08:58 +0000 Subject: tick: Nohz: Rework next timer evaluation The evaluation of the next timer in the nohz code is based on jiffies while all the tick internals are nano seconds based. We have also to convert hrtimer nanoseconds to jiffies in the !highres case. That's just wrong and introduces interesting corner cases. Turn it around and convert the next timer wheel timer expiry and the rcu event to clock monotonic and base all calculations on nanoseconds. That identifies the case where no timer is pending clearly with an absolute expiry value of KTIME_MAX. Makes the code more readable and gets rid of the jiffies magic in the nohz code. Signed-off-by: Thomas Gleixner Reviewed-by: Paul E. McKenney Acked-by: Peter Zijlstra Cc: Preeti U Murthy Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Cc: Josh Triplett Cc: Lai Jiangshan Cc: John Stultz Cc: Marcelo Tosatti Link: http://lkml.kernel.org/r/20150414203502.184198593@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 2 +- include/linux/rcupdate.h | 6 ++++-- include/linux/rcutree.h | 2 +- include/linux/timer.h | 7 ------- 4 files changed, 6 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 048270a27bc5..2c68f71ffd24 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -386,7 +386,7 @@ static inline int hrtimer_restart(struct hrtimer *timer) /* Query timers: */ extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer); -extern ktime_t hrtimer_get_next_event(void); +extern u64 hrtimer_get_next_event(void); /* * A timer is active, when it is enqueued into the rbtree or the diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 573a5afd5ed8..0627a447c589 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -44,6 +44,8 @@ #include #include #include +#include + #include extern int rcu_expedited; /* for sysctl */ @@ -1154,9 +1156,9 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) #if defined(CONFIG_TINY_RCU) || defined(CONFIG_RCU_NOCB_CPU_ALL) -static inline int rcu_needs_cpu(unsigned long *delta_jiffies) +static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt) { - *delta_jiffies = ULONG_MAX; + *nextevt = KTIME_MAX; return 0; } #endif /* #if defined(CONFIG_TINY_RCU) || defined(CONFIG_RCU_NOCB_CPU_ALL) */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index d2e583a6aaca..db2e31beaae7 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -32,7 +32,7 @@ void rcu_note_context_switch(void); #ifndef CONFIG_RCU_NOCB_CPU_ALL -int rcu_needs_cpu(unsigned long *delta_jiffies); +int rcu_needs_cpu(u64 basem, u64 *nextevt); #endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ void rcu_cpu_stall_reset(void); diff --git a/include/linux/timer.h b/include/linux/timer.h index 8c5a197e1587..fbb80e0030bf 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -187,13 +187,6 @@ extern void set_timer_slack(struct timer_list *time, int slack_hz); */ #define NEXT_TIMER_MAX_DELTA ((1UL << 30) - 1) -/* - * Return when the next timer-wheel timeout occurs (in absolute jiffies), - * locks the timer base and does the comparison against the given - * jiffie. - */ -extern unsigned long get_next_timer_interrupt(unsigned long now); - /* * Timer-statistics info: */ -- cgit v1.2.3 From 58f1f803f1d6ef9ab280de13246d65970a09cb95 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:09:08 +0000 Subject: hrtimer: Get rid of __hrtimer_start_range_ns() No more callers. Remove the leftovers. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Preeti U Murthy Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20150414203502.707871492@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 2c68f71ffd24..a80baa86bb24 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -359,10 +359,6 @@ extern int hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode); extern int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long range_ns, const enum hrtimer_mode mode); -extern int -__hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, - unsigned long delta_ns, - const enum hrtimer_mode mode, int wakeup); extern int hrtimer_cancel(struct hrtimer *timer); extern int hrtimer_try_to_cancel(struct hrtimer *timer); -- cgit v1.2.3 From 02a171af1a46966dcdb5b38cdc33e4f43e92c778 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:09:10 +0000 Subject: hrtimer: Make hrtimer_start() a inline wrapper No point for an extra export just to set the extra argument of hrtimer_start_range_ns() to 0. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Preeti U Murthy Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20150414203502.808544539@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index a80baa86bb24..42074ab3d5c3 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -355,11 +355,26 @@ static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { } #endif /* Basic timer operations: */ -extern int hrtimer_start(struct hrtimer *timer, ktime_t tim, - const enum hrtimer_mode mode); extern int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long range_ns, const enum hrtimer_mode mode); +/** + * hrtimer_start - (re)start an hrtimer on the current CPU + * @timer: the timer to be added + * @tim: expiry time + * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or + * relative (HRTIMER_MODE_REL) + * + * Returns: + * 0 on success + * 1 when the timer was active + */ +static inline int hrtimer_start(struct hrtimer *timer, ktime_t tim, + const enum hrtimer_mode mode) +{ + return hrtimer_start_range_ns(timer, tim, 0, mode); +} + extern int hrtimer_cancel(struct hrtimer *timer); extern int hrtimer_try_to_cancel(struct hrtimer *timer); -- cgit v1.2.3 From b193217e6dc3f88b599b573b53e0e0f6671d969a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:09:18 +0000 Subject: alarmtimer: Get rid of unused return value We want to get rid of the hrtimer_start() return value and the alarm timer return value is nowhere used. Remove it. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Preeti U Murthy Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Cc: John Stultz Link: http://lkml.kernel.org/r/20150414203503.243910615@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/alarmtimer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h index a899402a5a0e..52f3b7da4f2d 100644 --- a/include/linux/alarmtimer.h +++ b/include/linux/alarmtimer.h @@ -43,8 +43,8 @@ struct alarm { void alarm_init(struct alarm *alarm, enum alarmtimer_type type, enum alarmtimer_restart (*function)(struct alarm *, ktime_t)); -int alarm_start(struct alarm *alarm, ktime_t start); -int alarm_start_relative(struct alarm *alarm, ktime_t start); +void alarm_start(struct alarm *alarm, ktime_t start); +void alarm_start_relative(struct alarm *alarm, ktime_t start); void alarm_restart(struct alarm *alarm); int alarm_try_to_cancel(struct alarm *alarm); int alarm_cancel(struct alarm *alarm); -- cgit v1.2.3 From 61699e13072a89880aa584dcc64c6da465fb2ccc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2015 21:09:23 +0000 Subject: hrtimer: Remove hrtimer_start() return value No user was ever interested whether the timer was active or not when it was started. All abusers of the return value are gone, so get rid of it. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: Preeti U Murthy Cc: Viresh Kumar Cc: Marcelo Tosatti Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20150414203503.483556394@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 22 +++++++++------------- include/linux/interrupt.h | 6 +++--- 2 files changed, 12 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 42074ab3d5c3..470d876c2eda 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -355,7 +355,7 @@ static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { } #endif /* Basic timer operations: */ -extern int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, +extern void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long range_ns, const enum hrtimer_mode mode); /** @@ -364,34 +364,30 @@ extern int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, * @tim: expiry time * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or * relative (HRTIMER_MODE_REL) - * - * Returns: - * 0 on success - * 1 when the timer was active */ -static inline int hrtimer_start(struct hrtimer *timer, ktime_t tim, - const enum hrtimer_mode mode) +static inline void hrtimer_start(struct hrtimer *timer, ktime_t tim, + const enum hrtimer_mode mode) { - return hrtimer_start_range_ns(timer, tim, 0, mode); + hrtimer_start_range_ns(timer, tim, 0, mode); } extern int hrtimer_cancel(struct hrtimer *timer); extern int hrtimer_try_to_cancel(struct hrtimer *timer); -static inline int hrtimer_start_expires(struct hrtimer *timer, - enum hrtimer_mode mode) +static inline void hrtimer_start_expires(struct hrtimer *timer, + enum hrtimer_mode mode) { unsigned long delta; ktime_t soft, hard; soft = hrtimer_get_softexpires(timer); hard = hrtimer_get_expires(timer); delta = ktime_to_ns(ktime_sub(hard, soft)); - return hrtimer_start_range_ns(timer, soft, delta, mode); + hrtimer_start_range_ns(timer, soft, delta, mode); } -static inline int hrtimer_restart(struct hrtimer *timer) +static inline void hrtimer_restart(struct hrtimer *timer) { - return hrtimer_start_expires(timer, HRTIMER_MODE_ABS); + hrtimer_start_expires(timer, HRTIMER_MODE_ABS); } /* Query timers: */ diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 6bf15a66bce7..be7e75c945e9 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -593,10 +593,10 @@ tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer, clockid_t which_clock, enum hrtimer_mode mode); static inline -int tasklet_hrtimer_start(struct tasklet_hrtimer *ttimer, ktime_t time, - const enum hrtimer_mode mode) +void tasklet_hrtimer_start(struct tasklet_hrtimer *ttimer, ktime_t time, + const enum hrtimer_mode mode) { - return hrtimer_start(&ttimer->timer, time, mode); + hrtimer_start(&ttimer->timer, time, mode); } static inline -- cgit v1.2.3 From 0ea611a3bc5fb8f6a0bb1a76fe2dbf8ebe4bdf77 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 7 Apr 2015 15:36:32 +0800 Subject: ceph: rename snapshot support Signed-off-by: Yan, Zheng --- include/linux/ceph/ceph_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 31eb03d0c766..d7d072a25c27 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -323,6 +323,7 @@ enum { CEPH_MDS_OP_MKSNAP = 0x01400, CEPH_MDS_OP_RMSNAP = 0x01401, CEPH_MDS_OP_LSSNAP = 0x00402, + CEPH_MDS_OP_RENAMESNAP = 0x01403, }; extern const char *ceph_mds_op_name(int op); -- cgit v1.2.3 From 958a27658d94cf212caeb0ffb04ee0b0bc89cc40 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 14 Apr 2015 16:54:52 +0300 Subject: crush: straw2 bucket type with an efficient 64-bit crush_ln() This is an improved straw bucket that correctly avoids any data movement between items A and B when neither A nor B's weights are changed. Said differently, if we adjust the weight of item C (including adding it anew or removing it completely), we will only see inputs move to or from C, never between other items in the bucket. Notably, there is not intermediate scaling factor that needs to be calculated. The mapping function is a simple function of the item weights. The below commits were squashed together into this one (mostly to avoid adding and then yanking a ~6000 lines worth of crush_ln_table): - crush: add a straw2 bucket type - crush: add crush_ln to calculate nature log efficently - crush: improve straw2 adjustment slightly - crush: change crush_ln to provide 32 more digits - crush: fix crush_get_bucket_item_weight and bucket destroy for straw2 - crush/mapper: fix divide-by-0 in straw2 (with div64_s64() for draw = ln / w and INT64_MIN -> S64_MIN - need to create a proper compat.h in ceph.git) Reflects ceph.git commits 242293c908e923d474910f2b8203fa3b41eb5a53, 32a1ead92efcd351822d22a5fc37d159c65c1338, 6289912418c4a3597a11778bcf29ed5415117ad9, 35fcb04e2945717cf5cfe150b9fa89cb3d2303a1, 6445d9ee7290938de1e4ee9563912a6ab6d8ee5f, b5921d55d16796e12d66ad2c4add7305f9ce2353. Signed-off-by: Ilya Dryomov --- include/linux/crush/crush.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 4fad5f8ee01d..48a1a7d100f1 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -96,13 +96,15 @@ struct crush_rule { * uniform O(1) poor poor * list O(n) optimal poor * tree O(log n) good good - * straw O(n) optimal optimal + * straw O(n) better better + * straw2 O(n) optimal optimal */ enum { CRUSH_BUCKET_UNIFORM = 1, CRUSH_BUCKET_LIST = 2, CRUSH_BUCKET_TREE = 3, - CRUSH_BUCKET_STRAW = 4 + CRUSH_BUCKET_STRAW = 4, + CRUSH_BUCKET_STRAW2 = 5, }; extern const char *crush_bucket_alg_name(int alg); @@ -149,6 +151,11 @@ struct crush_bucket_straw { __u32 *straws; /* 16-bit fixed point */ }; +struct crush_bucket_straw2 { + struct crush_bucket h; + __u32 *item_weights; /* 16-bit fixed point */ +}; + /* @@ -189,6 +196,7 @@ extern void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b); extern void crush_destroy_bucket_list(struct crush_bucket_list *b); extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b); extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b); +extern void crush_destroy_bucket_straw2(struct crush_bucket_straw2 *b); extern void crush_destroy_bucket(struct crush_bucket *b); extern void crush_destroy_rule(struct crush_rule *r); extern void crush_destroy(struct crush_map *map); -- cgit v1.2.3 From 7c1c4747f28f59f2939687168203863f7e095d10 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 14 Apr 2015 23:13:31 +0300 Subject: libceph: announce support for straw2 buckets Sync up feature bits and enable CEPH_FEATURE_CRUSH_V4. Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_features.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 71e05bbf8ceb..4763ad64e832 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -50,6 +50,19 @@ #define CEPH_FEATURE_MDS_INLINE_DATA (1ULL<<40) #define CEPH_FEATURE_CRUSH_TUNABLES3 (1ULL<<41) #define CEPH_FEATURE_OSD_PRIMARY_AFFINITY (1ULL<<41) /* overlap w/ tunables3 */ +#define CEPH_FEATURE_MSGR_KEEPALIVE2 (1ULL<<42) +#define CEPH_FEATURE_OSD_POOLRESEND (1ULL<<43) +#define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V2 (1ULL<<44) +#define CEPH_FEATURE_OSD_SET_ALLOC_HINT (1ULL<<45) +#define CEPH_FEATURE_OSD_FADVISE_FLAGS (1ULL<<46) +#define CEPH_FEATURE_OSD_REPOP (1ULL<<46) /* overlap with fadvise */ +#define CEPH_FEATURE_OSD_OBJECT_DIGEST (1ULL<<46) /* overlap with fadvise */ +#define CEPH_FEATURE_OSD_TRANSACTION_MAY_LAYOUT (1ULL<<46) /* overlap w/ fadvise */ +#define CEPH_FEATURE_MDS_QUOTA (1ULL<<47) +#define CEPH_FEATURE_CRUSH_V4 (1ULL<<48) /* straw2 buckets */ +#define CEPH_FEATURE_OSD_MIN_SIZE_RECOVERY (1ULL<<49) +// duplicated since it was introduced at the same time as MIN_SIZE_RECOVERY +#define CEPH_FEATURE_OSD_PROXY_FEATURES (1ULL<<49) /* overlap w/ above */ /* * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature @@ -93,7 +106,8 @@ static inline u64 ceph_sanitize_features(u64 features) CEPH_FEATURE_EXPORT_PEER | \ CEPH_FEATURE_OSDMAP_ENC | \ CEPH_FEATURE_CRUSH_TUNABLES3 | \ - CEPH_FEATURE_OSD_PRIMARY_AFFINITY) + CEPH_FEATURE_OSD_PRIMARY_AFFINITY | \ + CEPH_FEATURE_CRUSH_V4) #define CEPH_FEATURES_REQUIRED_DEFAULT \ (CEPH_FEATURE_NOSRCADDR | \ -- cgit v1.2.3 From f3ca10dde49043fbbb055854278b26954b549b36 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 22 Apr 2015 09:44:36 -0700 Subject: Revert "mm: avoid tail page refcounting on non-THP compound pages" This reverts commit 8d63d99a5dfbdb997d12dd3c07b2070ca723db3b. It causes in VM mapping refcount errors: page:ffffea0010a15040 count:0 mapcount:1 mapping: (null) index:0x0 flags: 0x8000000000008014(referenced|dirty|tail) page dumped because: VM_BUG_ON_PAGE(page_mapcount(page) != 0) ------------[ cut here ]------------ kernel BUG at mm/swap.c:134! as reported by Borislav Petkov Reported-and-tested-by: Borislav Petkov Cc: Kirill A. Shutemov Cc: Hugh Dickins Cc: Andrea Arcangeli Cc: Andrew Morton Cc: linux-mm@kvack.org Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8b086070c3a5..0755b9fd03a7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -499,7 +499,7 @@ static inline int page_count(struct page *page) static inline bool __compound_tail_refcounted(struct page *page) { - return PageAnon(page) && !PageSlab(page) && !PageHeadHuge(page); + return !PageSlab(page) && !PageHeadHuge(page); } /* -- cgit v1.2.3 From 03c57747a7020a28a200e7e920fb48ecdc9b0fb8 Mon Sep 17 00:00:00 2001 From: Robert Shearman Date: Wed, 22 Apr 2015 11:14:37 +0100 Subject: mpls: Per-device MPLS state Add per-device MPLS state to supported interfaces. Use the presence of this state in mpls_route_add to determine that this is a supported interface. Use the presence of mpls_dev to drop packets that arrived on an unsupported interface - previously they were allowed through. Cc: "Eric W. Biederman" Signed-off-by: Robert Shearman Reviewed-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bcbde799ec69..dae106a3a998 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -60,6 +60,7 @@ struct phy_device; struct wireless_dev; /* 802.15.4 specific */ struct wpan_dev; +struct mpls_dev; void netdev_set_default_ethtool_ops(struct net_device *dev, const struct ethtool_ops *ops); @@ -1627,6 +1628,9 @@ struct net_device { void *ax25_ptr; struct wireless_dev *ieee80211_ptr; struct wpan_dev *ieee802154_ptr; +#if IS_ENABLED(CONFIG_MPLS_ROUTING) + struct mpls_dev __rcu *mpls_ptr; +#endif /* * Cache lines mostly used on receive path (including eth_type_trans()) -- cgit v1.2.3 From 59afdc7b32143528524455039e7557a46b60e4c8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 22 Apr 2015 11:28:46 +0800 Subject: crypto: api - Move module sig ifdef into accessor function Currently we're hiding mod->sig_ok under an ifdef in open code. This patch adds a module_sig_ok accessor function and removes that ifdef. Signed-off-by: Herbert Xu Acked-by: Rusty Russell --- include/linux/module.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index c883b86ea964..1e5436042eb0 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -655,4 +655,16 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr, static inline void module_bug_cleanup(struct module *mod) {} #endif /* CONFIG_GENERIC_BUG */ +#ifdef CONFIG_MODULE_SIG +static inline bool module_sig_ok(struct module *module) +{ + return module->sig_ok; +} +#else /* !CONFIG_MODULE_SIG */ +static inline bool module_sig_ok(struct module *module) +{ + return true; +} +#endif /* CONFIG_MODULE_SIG */ + #endif /* _LINUX_MODULE_H */ -- cgit v1.2.3 From 7e01b5acd88b3f3108d8c4ce44e3205d67437202 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 16 Apr 2015 14:47:33 +0200 Subject: kexec: allocate the kexec control page with KEXEC_CONTROL_MEMORY_GFP Introduce KEXEC_CONTROL_MEMORY_GFP to allow the architecture code to override the gfp flags of the allocation for the kexec control page. The loop in kimage_alloc_normal_control_pages allocates pages with GFP_KERNEL until a page is found that happens to have an address smaller than the KEXEC_CONTROL_MEMORY_LIMIT. On systems with a large memory size but a small KEXEC_CONTROL_MEMORY_LIMIT the loop will keep allocating memory until the oom killer steps in. Signed-off-by: Martin Schwidefsky --- include/linux/kexec.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index e60a745ac198..e804306ef5e8 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -40,6 +40,10 @@ #error KEXEC_CONTROL_MEMORY_LIMIT not defined #endif +#ifndef KEXEC_CONTROL_MEMORY_GFP +#define KEXEC_CONTROL_MEMORY_GFP GFP_KERNEL +#endif + #ifndef KEXEC_CONTROL_PAGE_SIZE #error KEXEC_CONTROL_PAGE_SIZE not defined #endif -- cgit v1.2.3 From ec65aafb9e3f316ff9167289e288856a7d528773 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 Apr 2015 12:06:30 +0200 Subject: netdev_alloc_pcpu_stats: use less common iterator variable With the CPU iteration variable called 'i', it's relatively easy to have variable shadowing which sparse will warn about. Avoid that by renaming the variable to __cpu which is less likely to be used in the surrounding context. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index dae106a3a998..dbad4d728b4b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2025,10 +2025,10 @@ struct pcpu_sw_netstats { ({ \ typeof(type) __percpu *pcpu_stats = alloc_percpu(type); \ if (pcpu_stats) { \ - int i; \ - for_each_possible_cpu(i) { \ + int __cpu; \ + for_each_possible_cpu(__cpu) { \ typeof(type) *stat; \ - stat = per_cpu_ptr(pcpu_stats, i); \ + stat = per_cpu_ptr(pcpu_stats, __cpu); \ u64_stats_init(&stat->syncp); \ } \ } \ -- cgit v1.2.3 From 9a51940bf65bf9fdc93027d70bdecdfc403c5b24 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Mon, 16 Mar 2015 14:06:23 -0400 Subject: NFS: Don't zap caches on fallocate() This patch adds a GETATTR to the end of ALLOCATE and DEALLOCATE operations so we can set the updated inode size and change attribute directly. DEALLOCATE will still need to release pagecache pages, so nfs42_proc_deallocate() now calls truncate_pagecache_range() before contacting the server. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 3d88908fd140..93ab6071bbe9 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1273,11 +1273,15 @@ struct nfs42_falloc_args { nfs4_stateid falloc_stateid; u64 falloc_offset; u64 falloc_length; + const u32 *falloc_bitmask; }; struct nfs42_falloc_res { struct nfs4_sequence_res seq_res; unsigned int status; + + struct nfs_fattr *falloc_fattr; + const struct nfs_server *falloc_server; }; struct nfs42_seek_args { -- cgit v1.2.3 From 3f9400981691f6845e5c22b962500742b80a5484 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 31 Mar 2015 12:03:28 -0400 Subject: sunrpc: make debugfs file creation failure non-fatal v2: gracefully handle the case where some dentry pointers end up NULL and be more dilligent about zeroing out dentry pointers We currently have a problem that SELinux policy is being enforced when creating debugfs files. If a debugfs file is created as a side effect of doing some syscall, then that creation can fail if the SELinux policy for that process prevents it. This seems wrong. We don't do that for files under /proc, for instance, so Bruce has proposed a patch to fix that. While discussing that patch however, Greg K.H. stated: "No kernel code should care / fail if a debugfs function fails, so please fix up the sunrpc code first." This patch converts all of the sunrpc debugfs setup code to be void return functins, and the callers to not look for errors from those functions. This should allow rpc_clnt and rpc_xprt creation to work, even if the kernel fails to create debugfs files for some reason. Cc: Greg Kroah-Hartman Acked-by: "J. Bruce Fields" Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/sunrpc/debug.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index c57d8ea0716c..59a7889e15db 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h @@ -60,17 +60,17 @@ struct rpc_xprt; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) void rpc_register_sysctl(void); void rpc_unregister_sysctl(void); -int sunrpc_debugfs_init(void); +void sunrpc_debugfs_init(void); void sunrpc_debugfs_exit(void); -int rpc_clnt_debugfs_register(struct rpc_clnt *); +void rpc_clnt_debugfs_register(struct rpc_clnt *); void rpc_clnt_debugfs_unregister(struct rpc_clnt *); -int rpc_xprt_debugfs_register(struct rpc_xprt *); +void rpc_xprt_debugfs_register(struct rpc_xprt *); void rpc_xprt_debugfs_unregister(struct rpc_xprt *); #else -static inline int +static inline void sunrpc_debugfs_init(void) { - return 0; + return; } static inline void @@ -79,10 +79,10 @@ sunrpc_debugfs_exit(void) return; } -static inline int +static inline void rpc_clnt_debugfs_register(struct rpc_clnt *clnt) { - return 0; + return; } static inline void @@ -91,10 +91,10 @@ rpc_clnt_debugfs_unregister(struct rpc_clnt *clnt) return; } -static inline int +static inline void rpc_xprt_debugfs_register(struct rpc_xprt *xprt) { - return 0; + return; } static inline void -- cgit v1.2.3 From f9ebd61855253078fe8b07bacaf516337f8078e8 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 15 Apr 2015 13:00:04 -0400 Subject: NFS: Remove CONFIG_NFS_V4 checks from nfs_idmap.h The idmapper is completely internal to the NFS v4 module, so this macro will always evaluate to true. This patch also removes unnecessary includes of this file from the generic NFS client. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- include/linux/nfs_idmap.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h index 333844e38f66..daaf3ea5e9d8 100644 --- a/include/linux/nfs_idmap.h +++ b/include/linux/nfs_idmap.h @@ -46,19 +46,8 @@ struct nfs_server; struct nfs_fattr; struct nfs4_string; -#if IS_ENABLED(CONFIG_NFS_V4) int nfs_idmap_init(void); void nfs_idmap_quit(void); -#else -static inline int nfs_idmap_init(void) -{ - return 0; -} - -static inline void nfs_idmap_quit(void) -{} -#endif - int nfs_idmap_new(struct nfs_client *); void nfs_idmap_delete(struct nfs_client *); -- cgit v1.2.3 From 40c64c26a43494ba9982fd1b87dc54e3819566fc Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 15 Apr 2015 13:00:05 -0400 Subject: NFS: Move nfs_idmap.h into fs/nfs/ This file is only used internally to the NFS v4 module, so it doesn't need to be in the global include path. I also renamed it from nfs_idmap.h to nfs4idmap.h to emphasize that it's an NFSv4-only include file. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- include/linux/nfs_idmap.h | 68 ----------------------------------------------- 1 file changed, 68 deletions(-) delete mode 100644 include/linux/nfs_idmap.h (limited to 'include/linux') diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h deleted file mode 100644 index daaf3ea5e9d8..000000000000 --- a/include/linux/nfs_idmap.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * include/linux/nfs_idmap.h - * - * UID and GID to name mapping for clients. - * - * Copyright (c) 2002 The Regents of the University of Michigan. - * All rights reserved. - * - * Marius Aamodt Eriksen - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef NFS_IDMAP_H -#define NFS_IDMAP_H - -#include -#include - - -/* Forward declaration to make this header independent of others */ -struct nfs_client; -struct nfs_server; -struct nfs_fattr; -struct nfs4_string; - -int nfs_idmap_init(void); -void nfs_idmap_quit(void); -int nfs_idmap_new(struct nfs_client *); -void nfs_idmap_delete(struct nfs_client *); - -void nfs_fattr_init_names(struct nfs_fattr *fattr, - struct nfs4_string *owner_name, - struct nfs4_string *group_name); -void nfs_fattr_free_names(struct nfs_fattr *); -void nfs_fattr_map_and_free_names(struct nfs_server *, struct nfs_fattr *); - -int nfs_map_name_to_uid(const struct nfs_server *, const char *, size_t, kuid_t *); -int nfs_map_group_to_gid(const struct nfs_server *, const char *, size_t, kgid_t *); -int nfs_map_uid_to_name(const struct nfs_server *, kuid_t, char *, size_t); -int nfs_map_gid_to_group(const struct nfs_server *, kgid_t, char *, size_t); - -int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res); - -extern unsigned int nfs_idmap_cache_timeout; -#endif /* NFS_IDMAP_H */ -- cgit v1.2.3 From af87baedf2c23b1181f51323339210a26a64f7fc Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 13 Apr 2015 14:11:28 +0800 Subject: x86/htirq: Use new irqdomain interfaces to allocate/free IRQ Use new irqdomain interfaces to allocate/free IRQ for HTIRQ, so we can remove GENERIC_IRQ_LEGACY_ALLOC_HWIRQ later. This patch changes the interfaces between arch independent PCI driver and arch specific code. Currently HT_IRQ is only enabled on x86, so it does not affect other architectures. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Link: http://lkml.kernel.org/r/1428905519-23704-7-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- include/linux/htirq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/htirq.h b/include/linux/htirq.h index 70a1dbbf2093..5caa51b7b95c 100644 --- a/include/linux/htirq.h +++ b/include/linux/htirq.h @@ -15,6 +15,8 @@ void unmask_ht_irq(struct irq_data *data); /* The arch hook for getting things started */ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev); +int arch_alloc_ht_irq(struct pci_dev *dev); +void arch_free_ht_irq(int irq); /* For drivers of buggy hardware */ typedef void (ht_irq_update_t)(struct pci_dev *dev, int irq, -- cgit v1.2.3 From b106ee63abccbba5f5a52d6e43168a6a30c6d98a Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 13 Apr 2015 14:11:32 +0800 Subject: irq_remapping/vt-d: Enhance Intel IR driver to support hierarchical irqdomains Enhance Intel interrupt remapping driver to support hierarchical irqdomains. Implement intel_ir_chip to support stacked irq_chip. Signed-off-by: Jiang Liu Acked-by: Joerg Roedel Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Greg Kroah-Hartman Cc: iommu@lists.linux-foundation.org Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Joerg Roedel Cc: David Woodhouse Link: http://lkml.kernel.org/r/1428905519-23704-11-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- include/linux/intel-iommu.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index a65208a8fe18..ecaf3a937845 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -286,6 +286,8 @@ struct q_inval { #define INTR_REMAP_TABLE_ENTRIES 65536 +struct irq_domain; + struct ir_table { struct irte *base; unsigned long *bitmap; @@ -335,6 +337,8 @@ struct intel_iommu { #ifdef CONFIG_IRQ_REMAP struct ir_table *ir_table; /* Interrupt remapping info */ + struct irq_domain *ir_domain; + struct irq_domain *ir_msi_domain; #endif struct device *iommu_dev; /* IOMMU-sysfs device */ int node; -- cgit v1.2.3 From 34742db8eaf9ff364034f214ee5827701e131d4b Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 13 Apr 2015 14:11:41 +0800 Subject: iommu/vt-d: Refine the interfaces to create IRQ for DMAR unit Refine the interfaces to create IRQ for DMAR unit. It's a preparation for converting DMAR IRQ to hierarchical irqdomain on x86. It also moves dmar_alloc_hwirq()/dmar_free_hwirq() from irq_remapping.h to dmar.h. They are not irq_remapping specific. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Greg Kroah-Hartman Cc: iommu@lists.linux-foundation.org Cc: Vinod Koul Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Cc: Tony Luck Cc: Fenghua Yu Cc: Joerg Roedel Link: http://lkml.kernel.org/r/1428905519-23704-20-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- include/linux/dmar.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 30624954dec5..84737565c1fd 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -227,6 +227,7 @@ extern void dmar_msi_read(int irq, struct msi_msg *msg); extern void dmar_msi_write(int irq, struct msi_msg *msg); extern int dmar_set_interrupt(struct intel_iommu *iommu); extern irqreturn_t dmar_fault(int irq, void *dev_id); -extern int arch_setup_dmar_msi(unsigned int irq); +extern int dmar_alloc_hwirq(int id, int node, void *arg); +extern void dmar_free_hwirq(int irq); #endif /* __DMAR_H__ */ -- cgit v1.2.3 From 49e07d8f28c05347f237146a9ec66f6d958db83e Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 13 Apr 2015 14:11:43 +0800 Subject: x86/htirq: Use hierarchical irqdomain to manage Hypertransport interrupts We have slightly changed the architecture interfaces to support htirq PCI driver. It's safe because currently Hypertransport interrupt is only enabled on x86 platforms. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: David Cohen Cc: Sander Eikelenboom Cc: David Vrabel Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Dimitri Sivanich Link: http://lkml.kernel.org/r/1428905519-23704-22-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- include/linux/htirq.h | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/htirq.h b/include/linux/htirq.h index 5caa51b7b95c..d4a527e58434 100644 --- a/include/linux/htirq.h +++ b/include/linux/htirq.h @@ -1,26 +1,38 @@ #ifndef LINUX_HTIRQ_H #define LINUX_HTIRQ_H +struct pci_dev; +struct irq_data; + struct ht_irq_msg { u32 address_lo; /* low 32 bits of the ht irq message */ u32 address_hi; /* high 32 bits of the it irq message */ }; +typedef void (ht_irq_update_t)(struct pci_dev *dev, int irq, + struct ht_irq_msg *msg); + +struct ht_irq_cfg { + struct pci_dev *dev; + /* Update callback used to cope with buggy hardware */ + ht_irq_update_t *update; + unsigned pos; + unsigned idx; + struct ht_irq_msg msg; +}; + /* Helper functions.. */ void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg); void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg); -struct irq_data; void mask_ht_irq(struct irq_data *data); void unmask_ht_irq(struct irq_data *data); /* The arch hook for getting things started */ -int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev); -int arch_alloc_ht_irq(struct pci_dev *dev); -void arch_free_ht_irq(int irq); +int arch_setup_ht_irq(int idx, int pos, struct pci_dev *dev, + ht_irq_update_t *update); +void arch_teardown_ht_irq(unsigned int irq); /* For drivers of buggy hardware */ -typedef void (ht_irq_update_t)(struct pci_dev *dev, int irq, - struct ht_irq_msg *msg); int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update); #endif /* LINUX_HTIRQ_H */ -- cgit v1.2.3 From 1d8dc3d3c8f1d8ee1da9d54c5d7c8694419ade42 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 Apr 2015 16:38:43 +0200 Subject: rhashtable: don't attempt to grow when at max_size The conversion of mac80211's station table to rhashtable had a bug that I found by accident in code review, that hadn't been found as rhashtable apparently managed to have a maximum hash chain length of one (!) in all our testing. In order to test the bug and verify the fix I set my rhashtable's max_size very low (4) in order to force getting hash collisions. At that point, rhashtable WARNed in rhashtable_insert_rehash() but didn't actually reject the hash table insertion. This caused it to lose insertions - my master list of stations would have 9 entries, but the rhashtable only had 5. This may warrant a deeper look, but that WARN_ON() just shouldn't happen. Fix this by not returning true from rht_grow_above_100() when the rhashtable's max_size has been reached - in this case the user is explicitly configuring it to be at most that big, so even if it's now above 100% it shouldn't attempt to resize. This fixes the "lost insertion" issue and consequently allows my code to display its error (and verify my fix for it.) Signed-off-by: Johannes Berg Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index e23d242d1230..dbcbcc59aa92 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -282,7 +282,8 @@ static inline bool rht_shrink_below_30(const struct rhashtable *ht, static inline bool rht_grow_above_100(const struct rhashtable *ht, const struct bucket_table *tbl) { - return atomic_read(&ht->nelems) > tbl->size; + return atomic_read(&ht->nelems) > tbl->size && + (!ht->p.max_size || tbl->size < ht->p.max_size); } /* The bucket lock is selected based on the hash and protects mutations -- cgit v1.2.3 From 547c4b547e07dcc60874b6ef6252dd49ff74aec1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 20 Apr 2015 12:35:47 +0200 Subject: netfilter: bridge: fix NULL deref in physin/out ifindex helpers Might not have an outdev yet. We'll oops when iface goes down while skbs are still nfqueue'd: RIP: 0010:[] [] dev_cmp+0x4f/0x80 nfqnl_rcv_dev_event+0xe2/0x150 notifier_call_chain+0x53/0xa0 Fixes: c737b7c4510026 ("netfilter: bridge: add helpers for fetching physin/outdev") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index ab8f76dba668..f2fdb5a52070 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -39,12 +39,24 @@ static inline void br_drop_fake_rtable(struct sk_buff *skb) static inline int nf_bridge_get_physinif(const struct sk_buff *skb) { - return skb->nf_bridge ? skb->nf_bridge->physindev->ifindex : 0; + struct nf_bridge_info *nf_bridge; + + if (skb->nf_bridge == NULL) + return 0; + + nf_bridge = skb->nf_bridge; + return nf_bridge->physindev ? nf_bridge->physindev->ifindex : 0; } static inline int nf_bridge_get_physoutif(const struct sk_buff *skb) { - return skb->nf_bridge ? skb->nf_bridge->physoutdev->ifindex : 0; + struct nf_bridge_info *nf_bridge; + + if (skb->nf_bridge == NULL) + return 0; + + nf_bridge = skb->nf_bridge; + return nf_bridge->physoutdev ? nf_bridge->physoutdev->ifindex : 0; } static inline struct net_device * -- cgit v1.2.3 From 1dcc73d7bb0429994c54d33b40c5fb82b741a791 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 22 Apr 2015 18:20:04 +0100 Subject: irqchip: gic: Drop support for gic_arch_extn Now that the users of gic_arch_extn have been fixed, drop the "feature" for good. This leads to the removal of some now useless locking. Signed-off-by: Marc Zyngier Cc: linux-arm-kernel@lists.infradead.org Cc: Jason Cooper Signed-off-by: Thomas Gleixner --- include/linux/irqchip/arm-gic.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 36ec4ae74634..9de976b4f9a7 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -95,8 +95,6 @@ struct device_node; -extern struct irq_chip gic_arch_extn; - void gic_set_irqchip_flags(unsigned long flags); void gic_init_bases(unsigned int, int, void __iomem *, void __iomem *, u32 offset, struct device_node *); -- cgit v1.2.3 From fe0f07d08ee35fb13d2cb048970072fe4f71ad14 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 15 Apr 2015 17:05:48 -0600 Subject: direct-io: only inc/dec inode->i_dio_count for file systems do_blockdev_direct_IO() increments and decrements the inode ->i_dio_count for each IO operation. It does this to protect against truncate of a file. Block devices don't need this sort of protection. For a capable multiqueue setup, this atomic int is the only shared state between applications accessing the device for O_DIRECT, and it presents a scaling wall for that. In my testing, as much as 30% of system time is spent incrementing and decrementing this value. A mixed read/write workload improved from ~2.5M IOPS to ~9.6M IOPS, with better latencies too. Before: clat percentiles (usec): | 1.00th=[ 33], 5.00th=[ 34], 10.00th=[ 34], 20.00th=[ 34], | 30.00th=[ 34], 40.00th=[ 34], 50.00th=[ 35], 60.00th=[ 35], | 70.00th=[ 35], 80.00th=[ 35], 90.00th=[ 37], 95.00th=[ 80], | 99.00th=[ 98], 99.50th=[ 151], 99.90th=[ 155], 99.95th=[ 155], | 99.99th=[ 165] After: clat percentiles (usec): | 1.00th=[ 95], 5.00th=[ 108], 10.00th=[ 129], 20.00th=[ 149], | 30.00th=[ 155], 40.00th=[ 161], 50.00th=[ 167], 60.00th=[ 171], | 70.00th=[ 177], 80.00th=[ 185], 90.00th=[ 201], 95.00th=[ 270], | 99.00th=[ 390], 99.50th=[ 398], 99.90th=[ 418], 99.95th=[ 422], | 99.99th=[ 438] In other setups, Robert Elliott reported seeing good performance improvements: https://lkml.org/lkml/2015/4/3/557 The more applications accessing the device, the worse it gets. Add a new direct-io flags, DIO_SKIP_DIO_COUNT, which tells do_blockdev_direct_IO() that it need not worry about incrementing or decrementing the inode i_dio_count for this caller. Cc: Andrew Morton Cc: Christoph Hellwig Cc: Theodore Ts'o Cc: Elliott, Robert (Server Storage) Cc: Al Viro Signed-off-by: Jens Axboe Signed-off-by: Al Viro --- include/linux/fs.h | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index b1d7db28c13c..9055eefa92c7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2635,6 +2635,9 @@ enum { /* filesystem can handle aio writes beyond i_size */ DIO_ASYNC_EXTEND = 0x04, + + /* inode/fs/bdev does not need truncate protection */ + DIO_SKIP_DIO_COUNT = 0x08, }; void dio_end_io(struct bio *bio, int error); @@ -2657,7 +2660,31 @@ static inline ssize_t blockdev_direct_IO(struct kiocb *iocb, #endif void inode_dio_wait(struct inode *inode); -void inode_dio_done(struct inode *inode); + +/* + * inode_dio_begin - signal start of a direct I/O requests + * @inode: inode the direct I/O happens on + * + * This is called once we've finished processing a direct I/O request, + * and is used to wake up callers waiting for direct I/O to be quiesced. + */ +static inline void inode_dio_begin(struct inode *inode) +{ + atomic_inc(&inode->i_dio_count); +} + +/* + * inode_dio_end - signal finish of a direct I/O requests + * @inode: inode the direct I/O happens on + * + * This is called once we've finished processing a direct I/O request, + * and is used to wake up callers waiting for direct I/O to be quiesced. + */ +static inline void inode_dio_end(struct inode *inode) +{ + if (atomic_dec_and_test(&inode->i_dio_count)) + wake_up_bit(&inode->i_state, __I_DIO_WAKEUP); +} extern void inode_set_flags(struct inode *inode, unsigned int flags, unsigned int mask); -- cgit v1.2.3 From ac74d8d65c83d8061034d0908e1eab6a0c24f923 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 16 Apr 2015 15:04:56 -0500 Subject: fix I_DIO_WAKEUP definition I_DIO_WAKEUP is never directly used, but fix it up anyway. Signed-off-by: Eric Sandeen Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 9055eefa92c7..43565607088e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1812,7 +1812,7 @@ struct super_operations { #define I_SYNC (1 << __I_SYNC) #define I_REFERENCED (1 << 8) #define __I_DIO_WAKEUP 9 -#define I_DIO_WAKEUP (1 << I_DIO_WAKEUP) +#define I_DIO_WAKEUP (1 << __I_DIO_WAKEUP) #define I_LINKABLE (1 << 10) #define I_DIRTY_TIME (1 << 11) #define __I_DIRTY_TIME_EXPIRED 12 -- cgit v1.2.3 From 2ea2f62c8bda242433809c7f4e9eae1c52c40bbe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 24 Apr 2015 16:05:01 -0700 Subject: net: fix crash in build_skb() When I added pfmemalloc support in build_skb(), I forgot netlink was using build_skb() with a vmalloc() area. In this patch I introduce __build_skb() for netlink use, and build_skb() is a wrapper handling both skb->head_frag and skb->pfmemalloc This means netlink no longer has to hack skb->head_frag [ 1567.700067] kernel BUG at arch/x86/mm/physaddr.c:26! [ 1567.700067] invalid opcode: 0000 [#1] PREEMPT SMP KASAN [ 1567.700067] Dumping ftrace buffer: [ 1567.700067] (ftrace buffer empty) [ 1567.700067] Modules linked in: [ 1567.700067] CPU: 9 PID: 16186 Comm: trinity-c182 Not tainted 4.0.0-next-20150424-sasha-00037-g4796e21 #2167 [ 1567.700067] task: ffff880127efb000 ti: ffff880246770000 task.ti: ffff880246770000 [ 1567.700067] RIP: __phys_addr (arch/x86/mm/physaddr.c:26 (discriminator 3)) [ 1567.700067] RSP: 0018:ffff8802467779d8 EFLAGS: 00010202 [ 1567.700067] RAX: 000041000ed8e000 RBX: ffffc9008ed8e000 RCX: 000000000000002c [ 1567.700067] RDX: 0000000000000004 RSI: 0000000000000000 RDI: ffffffffb3fd6049 [ 1567.700067] RBP: ffff8802467779f8 R08: 0000000000000019 R09: ffff8801d0168000 [ 1567.700067] R10: ffff8801d01680c7 R11: ffffed003a02d019 R12: ffffc9000ed8e000 [ 1567.700067] R13: 0000000000000f40 R14: 0000000000001180 R15: ffffc9000ed8e000 [ 1567.700067] FS: 00007f2a7da3f700(0000) GS:ffff8801d1000000(0000) knlGS:0000000000000000 [ 1567.700067] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1567.700067] CR2: 0000000000738308 CR3: 000000022e329000 CR4: 00000000000007e0 [ 1567.700067] Stack: [ 1567.700067] ffffc9000ed8e000 ffff8801d0168000 ffffc9000ed8e000 ffff8801d0168000 [ 1567.700067] ffff880246777a28 ffffffffad7c0a21 0000000000001080 ffff880246777c08 [ 1567.700067] ffff88060d302e68 ffff880246777b58 ffff880246777b88 ffffffffad9a6821 [ 1567.700067] Call Trace: [ 1567.700067] build_skb (include/linux/mm.h:508 net/core/skbuff.c:316) [ 1567.700067] netlink_sendmsg (net/netlink/af_netlink.c:1633 net/netlink/af_netlink.c:2329) [ 1567.774369] ? sched_clock_cpu (kernel/sched/clock.c:311) [ 1567.774369] ? netlink_unicast (net/netlink/af_netlink.c:2273) [ 1567.774369] ? netlink_unicast (net/netlink/af_netlink.c:2273) [ 1567.774369] sock_sendmsg (net/socket.c:614 net/socket.c:623) [ 1567.774369] sock_write_iter (net/socket.c:823) [ 1567.774369] ? sock_sendmsg (net/socket.c:806) [ 1567.774369] __vfs_write (fs/read_write.c:479 fs/read_write.c:491) [ 1567.774369] ? get_lock_stats (kernel/locking/lockdep.c:249) [ 1567.774369] ? default_llseek (fs/read_write.c:487) [ 1567.774369] ? vtime_account_user (kernel/sched/cputime.c:701) [ 1567.774369] ? rw_verify_area (fs/read_write.c:406 (discriminator 4)) [ 1567.774369] vfs_write (fs/read_write.c:539) [ 1567.774369] SyS_write (fs/read_write.c:586 fs/read_write.c:577) [ 1567.774369] ? SyS_read (fs/read_write.c:577) [ 1567.774369] ? __this_cpu_preempt_check (lib/smp_processor_id.c:63) [ 1567.774369] ? trace_hardirqs_on_caller (kernel/locking/lockdep.c:2594 kernel/locking/lockdep.c:2636) [ 1567.774369] ? trace_hardirqs_on_thunk (arch/x86/lib/thunk_64.S:42) [ 1567.774369] system_call_fastpath (arch/x86/kernel/entry_64.S:261) Fixes: 79930f5892e ("net: do not deplete pfmemalloc reserve") Signed-off-by: Eric Dumazet Reported-by: Sasha Levin Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 06793b598f44..66e374d62f64 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -773,6 +773,7 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags, int node); +struct sk_buff *__build_skb(void *data, unsigned int frag_size); struct sk_buff *build_skb(void *data, unsigned int frag_size); static inline struct sk_buff *alloc_skb(unsigned int size, gfp_t priority) -- cgit v1.2.3 From 8393b811f38acdf7fd8da2028708edad3e68ce1f Mon Sep 17 00:00:00 2001 From: Gabriele Mazzotta Date: Sat, 25 Apr 2015 19:52:36 +0200 Subject: libata: Add helper to determine when PHY events should be ignored This is a preparation commit that will allow to add other criteria according to which PHY events should be dropped. Signed-off-by: Gabriele Mazzotta Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 8dad4a307bb8..6138d87277af 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1201,6 +1201,7 @@ extern struct ata_device *ata_dev_pair(struct ata_device *adev); extern int ata_do_set_mode(struct ata_link *link, struct ata_device **r_failed_dev); extern void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap); extern void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, struct list_head *eh_q); +extern bool sata_lpm_ignore_phy_events(struct ata_link *link); extern int ata_cable_40wire(struct ata_port *ap); extern int ata_cable_80wire(struct ata_port *ap); -- cgit v1.2.3 From 09c5b4803a80a5451d950d6a539d2eb311dc0fb1 Mon Sep 17 00:00:00 2001 From: Gabriele Mazzotta Date: Sat, 25 Apr 2015 19:52:37 +0200 Subject: libata: Ignore spurious PHY event on LPM policy change When the LPM policy is set to ATA_LPM_MAX_POWER, the device might generate a spurious PHY event that cuases errors on the link. Ignore this event if it occured within 10s after the policy change. The timeout was chosen observing that on a Dell XPS13 9333 these spurious events can occur up to roughly 6s after the policy change. Link: http://lkml.kernel.org/g/3352987.ugV1Ipy7Z5@xps13 Signed-off-by: Gabriele Mazzotta Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org --- include/linux/libata.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 6138d87277af..28aeae46f355 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -205,6 +205,7 @@ enum { ATA_LFLAG_SW_ACTIVITY = (1 << 7), /* keep activity stats */ ATA_LFLAG_NO_LPM = (1 << 8), /* disable LPM on this link */ ATA_LFLAG_RST_ONCE = (1 << 9), /* limit recovery to one reset */ + ATA_LFLAG_CHANGED = (1 << 10), /* LPM state changed on this link */ /* struct ata_port flags */ ATA_FLAG_SLAVE_POSS = (1 << 0), /* host supports slave dev */ @@ -309,6 +310,12 @@ enum { */ ATA_TMOUT_PMP_SRST_WAIT = 5000, + /* When the LPM policy is set to ATA_LPM_MAX_POWER, there might + * be a spurious PHY event, so ignore the first PHY event that + * occurs within 10s after the policy change. + */ + ATA_TMOUT_SPURIOUS_PHY = 10000, + /* ATA bus states */ BUS_UNKNOWN = 0, BUS_DMA = 1, @@ -788,6 +795,8 @@ struct ata_link { struct ata_eh_context eh_context; struct ata_device device[ATA_MAX_DEVICES]; + + unsigned long last_lpm_change; /* when last LPM change happened */ }; #define ATA_LINK_CLEAR_BEGIN offsetof(struct ata_link, active_tag) #define ATA_LINK_CLEAR_END offsetof(struct ata_link, device[0]) -- cgit v1.2.3 From 73459e2a1ada09a68c02cc5b73f3116fc8194b3d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 23 Apr 2015 13:20:18 +0200 Subject: x86: pvclock: Really remove the sched notifier for cross-cpu migrations This reverts commits 0a4e6be9ca17c54817cf814b4b5aa60478c6df27 and 80f7fdb1c7f0f9266421f823964fd1962681f6ce. The task migration notifier was originally introduced in order to support the pvclock vsyscall with non-synchronized TSC, but KVM only supports it with synchronized TSC. Hence, on KVM the race condition is only needed due to a bad implementation on the host side, and even then it's so rare that it's mostly theoretical. As far as KVM is concerned it's possible to fix the host, avoiding the additional complexity in the vDSO and the (re)introduction of the task migration notifier. Xen, on the other hand, hasn't yet implemented vsyscall support at all, so we do not care about its plans for non-synchronized TSC. Reported-by: Peter Zijlstra Suggested-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini --- include/linux/sched.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8222ae40ecb0..26a2e6122734 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -175,14 +175,6 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); extern void calc_global_load(unsigned long ticks); extern void update_cpu_load_nohz(void); -/* Notifier for when a task gets migrated to a new CPU */ -struct task_migration_notifier { - struct task_struct *task; - int from_cpu; - int to_cpu; -}; -extern void register_task_migration_notifier(struct notifier_block *n); - extern unsigned long get_parent_ip(unsigned long addr); extern void dump_cpu_task(int cpu); -- cgit v1.2.3 From ee136af4a064c2f61e2025873584d2c7ec93f4ae Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 21 Apr 2015 11:20:31 +0200 Subject: uas: Add US_FL_MAX_SECTORS_240 flag The usb-storage driver sets max_sectors = 240 in its scsi-host template, for uas we do not want to do that for all devices, but testing has shown that some devices need it. This commit adds a US_FL_MAX_SECTORS_240 flag for such devices, and implements support for it in uas.c, while at it it also adds support for US_FL_MAX_SECTORS_64 to uas.c. Cc: stable@vger.kernel.org # 3.16 Signed-off-by: Hans de Goede Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- include/linux/usb_usual.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index a7f2604c5f25..7f5f78bd15ad 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -77,6 +77,8 @@ /* Cannot handle ATA_12 or ATA_16 CDBs */ \ US_FLAG(NO_REPORT_OPCODES, 0x04000000) \ /* Cannot handle MI_REPORT_SUPPORTED_OPERATION_CODES */ \ + US_FLAG(MAX_SECTORS_240, 0x08000000) \ + /* Sets max_sectors to 240 */ \ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; -- cgit v1.2.3 From b00f5c2dc01450bed9fed1a41a637fa917e03c5c Mon Sep 17 00:00:00 2001 From: Frederic Danis Date: Fri, 10 Apr 2015 15:13:05 +0200 Subject: tty: Re-add external interface for tty_set_termios() This is needed by Bluetooth hci_uart module to be able to change speed of Bluetooth controller and local UART. Signed-off-by: Frederic Danis Reviewed-by: Peter Hurley Cc: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 358a337af598..fe5623c9af71 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -491,6 +491,7 @@ static inline speed_t tty_get_baud_rate(struct tty_struct *tty) extern void tty_termios_copy_hw(struct ktermios *new, struct ktermios *old); extern int tty_termios_hw_change(struct ktermios *a, struct ktermios *b); +extern int tty_set_termios(struct tty_struct *tty, struct ktermios *kt); extern struct tty_ldisc *tty_ldisc_ref(struct tty_struct *); extern void tty_ldisc_deref(struct tty_ldisc *); -- cgit v1.2.3 From 591fc116f3302da915bb57d4474a61a5e8884cec Mon Sep 17 00:00:00 2001 From: "Ivan T. Ivanov" Date: Thu, 9 Apr 2015 11:34:22 +0300 Subject: usb: phy: msm: Use extcon framework for VBUS and ID detection On recent Qualcomm platforms VBUS and ID lines are not routed to USB PHY LINK controller. Use extcon framework to receive connect and disconnect ID and VBUS notification. Signed-off-by: Ivan T. Ivanov Signed-off-by: Felipe Balbi --- include/linux/usb/msm_hsusb.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/msm_hsusb.h b/include/linux/usb/msm_hsusb.h index 7dbecf9a4656..c4d956e50d09 100644 --- a/include/linux/usb/msm_hsusb.h +++ b/include/linux/usb/msm_hsusb.h @@ -18,6 +18,7 @@ #ifndef __ASM_ARCH_MSM_HSUSB_H #define __ASM_ARCH_MSM_HSUSB_H +#include #include #include #include @@ -119,6 +120,17 @@ struct msm_otg_platform_data { void (*setup_gpio)(enum usb_otg_state state); }; +/** + * struct msm_usb_cable - structure for exteternal connector cable + * state tracking + * @nb: hold event notification callback + * @conn: used for notification registration + */ +struct msm_usb_cable { + struct notifier_block nb; + struct extcon_specific_cable_nb conn; +}; + /** * struct msm_otg: OTG driver data. Shared by HCD and DCD. * @otg: USB OTG Transceiver structure. @@ -138,6 +150,8 @@ struct msm_otg_platform_data { * @chg_type: The type of charger attached. * @dcd_retires: The retry count used to track Data contact * detection process. + * @vbus: VBUS signal state trakining, using extcon framework + * @id: ID signal state trakining, using extcon framework */ struct msm_otg { struct usb_phy phy; @@ -166,6 +180,9 @@ struct msm_otg { struct reset_control *phy_rst; struct reset_control *link_rst; int vdd_levels[3]; + + struct msm_usb_cable vbus; + struct msm_usb_cable id; }; #endif -- cgit v1.2.3 From 44e42ae3a398b559c768b9b3c324d72b0b0b4479 Mon Sep 17 00:00:00 2001 From: "Ivan T. Ivanov" Date: Thu, 9 Apr 2015 11:34:33 +0300 Subject: usb: phy: msm: Manual PHY and LINK controller VBUS change notification VBUS is not routed to USB PHY on recent Qualcomm platforms. USB controller must see VBUS in order to pull-up DP when setting RS bit. Henc configure USB PHY and LINK registers sense VBUS and enable manual pullup on D+ line. Cc: Vamsi Krishna Cc: Mayank Rana Signed-off-by: Ivan T. Ivanov Signed-off-by: Felipe Balbi --- include/linux/usb/msm_hsusb.h | 5 +++++ include/linux/usb/msm_hsusb_hw.h | 9 +++++++++ 2 files changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/msm_hsusb.h b/include/linux/usb/msm_hsusb.h index c4d956e50d09..e55a1504266e 100644 --- a/include/linux/usb/msm_hsusb.h +++ b/include/linux/usb/msm_hsusb.h @@ -150,6 +150,9 @@ struct msm_usb_cable { * @chg_type: The type of charger attached. * @dcd_retires: The retry count used to track Data contact * detection process. + * @manual_pullup: true if VBUS is not routed to USB controller/phy + * and controller driver therefore enables pull-up explicitly before + * starting controller using usbcmd run/stop bit. * @vbus: VBUS signal state trakining, using extcon framework * @id: ID signal state trakining, using extcon framework */ @@ -181,6 +184,8 @@ struct msm_otg { struct reset_control *link_rst; int vdd_levels[3]; + bool manual_pullup; + struct msm_usb_cable vbus; struct msm_usb_cable id; }; diff --git a/include/linux/usb/msm_hsusb_hw.h b/include/linux/usb/msm_hsusb_hw.h index a29f6030afb1..e159b39f67a2 100644 --- a/include/linux/usb/msm_hsusb_hw.h +++ b/include/linux/usb/msm_hsusb_hw.h @@ -21,6 +21,8 @@ #define USB_AHBBURST (MSM_USB_BASE + 0x0090) #define USB_AHBMODE (MSM_USB_BASE + 0x0098) +#define USB_GENCONFIG_2 (MSM_USB_BASE + 0x00a0) + #define USB_CAPLENGTH (MSM_USB_BASE + 0x0100) /* 8 bit */ #define USB_USBCMD (MSM_USB_BASE + 0x0140) @@ -30,6 +32,9 @@ #define USB_PHY_CTRL (MSM_USB_BASE + 0x0240) #define USB_PHY_CTRL2 (MSM_USB_BASE + 0x0278) +#define GENCONFIG_2_SESS_VLD_CTRL_EN BIT(7) +#define USBCMD_SESS_VLD_CTRL BIT(25) + #define USBCMD_RESET 2 #define USB_USBINTR (MSM_USB_BASE + 0x0148) @@ -50,6 +55,10 @@ #define ULPI_PWR_CLK_MNG_REG 0x88 #define OTG_COMP_DISABLE BIT(0) +#define ULPI_MISC_A 0x96 +#define ULPI_MISC_A_VBUSVLDEXTSEL BIT(1) +#define ULPI_MISC_A_VBUSVLDEXT BIT(0) + #define ASYNC_INTR_CTRL (1 << 29) /* Enable async interrupt */ #define ULPI_STP_CTRL (1 << 30) /* Block communication with PHY */ #define PHY_RETEN (1 << 1) /* PHY retention enable/disable */ -- cgit v1.2.3 From b189a2117223edbe40e0a187ae5c606cbdd6447c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 28 Apr 2015 14:04:07 +0200 Subject: usb: phy: Remove the phy-rcar-gen2-usb driver The phy-rcar-gen2-usb driver, which supports legacy platform data only, is no longer used since commit a483dcbfa21f919c ("ARM: shmobile: lager: Remove legacy board support"). This driver was superseded by the DT-only phy-rcar-gen2 driver, which was introduced in commit 1233f59f745b237d ("phy: Renesas R-Car Gen2 PHY driver"). Signed-off-by: Geert Uytterhoeven Signed-off-by: Felipe Balbi --- include/linux/platform_data/usb-rcar-gen2-phy.h | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 include/linux/platform_data/usb-rcar-gen2-phy.h (limited to 'include/linux') diff --git a/include/linux/platform_data/usb-rcar-gen2-phy.h b/include/linux/platform_data/usb-rcar-gen2-phy.h deleted file mode 100644 index dd3ba46c0d90..000000000000 --- a/include/linux/platform_data/usb-rcar-gen2-phy.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (C) 2013 Renesas Solutions Corp. - * Copyright (C) 2013 Cogent Embedded, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef __USB_RCAR_GEN2_PHY_H -#define __USB_RCAR_GEN2_PHY_H - -#include - -struct rcar_gen2_phy_platform_data { - /* USB channel 0 configuration */ - bool chan0_pci:1; /* true: PCI USB host 0, false: USBHS */ - /* USB channel 2 configuration */ - bool chan2_pci:1; /* true: PCI USB host 2, false: USBSS */ -}; - -#endif -- cgit v1.2.3 From 46c264daaaa569e24f8aba877d0fd8167c42a9a4 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 28 Apr 2015 18:33:49 +0200 Subject: bridge/nl: remove wrong use of NLM_F_MULTI NLM_F_MULTI must be used only when a NLMSG_DONE message is sent. In fact, it is sent only at the end of a dump. Libraries like libnl will wait forever for NLMSG_DONE. Fixes: e5a55a898720 ("net: create generic bridge ops") Fixes: 815cccbf10b2 ("ixgbe: add setlink, getlink support to ixgbe and ixgbevf") CC: John Fastabend CC: Sathya Perla CC: Subbu Seetharaman CC: Ajit Khaparde CC: Jeff Kirsher CC: intel-wired-lan@lists.osuosl.org CC: Jiri Pirko CC: Scott Feldman CC: Stephen Hemminger CC: bridge@lists.linux-foundation.org Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++-- include/linux/rtnetlink.h | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index dbad4d728b4b..1899c74a7127 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -977,7 +977,8 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh, * u16 flags) * int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, - * struct net_device *dev, u32 filter_mask) + * struct net_device *dev, u32 filter_mask, + * int nlflags) * int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh, * u16 flags); * @@ -1173,7 +1174,8 @@ struct net_device_ops { int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, - u32 filter_mask); + u32 filter_mask, + int nlflags); int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 2da5d1081ad9..7b8e260c4a27 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -122,5 +122,5 @@ extern int ndo_dflt_fdb_del(struct ndmsg *ndm, extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u16 mode, - u32 flags, u32 mask); + u32 flags, u32 mask, int nlflags); #endif /* __LINUX_RTNETLINK_H */ -- cgit v1.2.3 From 0df48c26d8418c5c9fba63fac15b660d70ca2f1c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Apr 2015 15:28:17 -0700 Subject: tcp: add tcpi_bytes_acked to tcp_info This patch tracks total number of bytes acked for a TCP socket. This is the sum of all changes done to tp->snd_una, and allows for precise tracking of delivered data. RFC4898 named this : tcpEStatsAppHCThruOctetsAcked This is a 64bit field, and can be fetched both from TCP_INFO getsockopt() if one has a handle on a TCP socket, or from inet_diag netlink facility (iproute2/ss patch will follow) Note that tp->bytes_acked was placed near tp->snd_una for best data locality and minimal performance impact. Signed-off-by: Eric Dumazet Acked-by: Yuchung Cheng Cc: Matt Mathis Cc: Eric Salo Cc: Martin Lau Cc: Chris Rapier Signed-off-by: David S. Miller --- include/linux/tcp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 0caa3a2d4106..0f73b43171da 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -150,6 +150,10 @@ struct tcp_sock { u32 rcv_wup; /* rcv_nxt on last window update sent */ u32 snd_nxt; /* Next sequence we send */ + u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked + * sum(delta(snd_una)), or how many bytes + * were acked. + */ u32 snd_una; /* First byte we want an ack for */ u32 snd_sml; /* Last byte of the most recently transmitted small packet */ u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ -- cgit v1.2.3 From bdd1f9edacb5f5835d1e6276571bbbe5b88ded48 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Apr 2015 15:28:18 -0700 Subject: tcp: add tcpi_bytes_received to tcp_info This patch tracks total number of payload bytes received on a TCP socket. This is the sum of all changes done to tp->rcv_nxt RFC4898 named this : tcpEStatsAppHCThruOctetsReceived This is a 64bit field, and can be fetched both from TCP_INFO getsockopt() if one has a handle on a TCP socket, or from inet_diag netlink facility (iproute2/ss patch will follow) Note that tp->bytes_received was placed near tp->rcv_nxt for best data locality and minimal performance impact. Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Matt Mathis Cc: Eric Salo Cc: Martin Lau Cc: Chris Rapier Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/linux/tcp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 0f73b43171da..3b2911502a8c 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -145,6 +145,10 @@ struct tcp_sock { * read the code and the spec side by side (and laugh ...) * See RFC793 and RFC1122. The RFC writes these in capitals. */ + u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived + * sum(delta(rcv_nxt)), or how many bytes + * were acked. + */ u32 rcv_nxt; /* What we want to receive next */ u32 copied_seq; /* Head of yet unread data */ u32 rcv_wup; /* rcv_nxt on last window update sent */ -- cgit v1.2.3 From f1ec7187167ce225d2744b20a90afef5f10fd6cd Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 29 Apr 2015 16:02:30 -0500 Subject: libfdt: add fdt type definitions In preparation for libfdt/dtc update, add the new fdt specific types. Signed-off-by: Rob Herring Cc: Russell King Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org --- include/linux/libfdt_env.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libfdt_env.h b/include/linux/libfdt_env.h index 01508c7b8c81..2a663c6bb428 100644 --- a/include/linux/libfdt_env.h +++ b/include/linux/libfdt_env.h @@ -5,6 +5,10 @@ #include +typedef __be16 fdt16_t; +typedef __be32 fdt32_t; +typedef __be64 fdt64_t; + #define fdt32_to_cpu(x) be32_to_cpu(x) #define cpu_to_fdt32(x) cpu_to_be32(x) #define fdt64_to_cpu(x) be64_to_cpu(x) -- cgit v1.2.3 From 042f7df15a4fff8eec42873f755aea848dcdedd1 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 30 Apr 2015 17:16:12 +0800 Subject: workqueue: Allow modifying low level unbound workqueue cpumask Allow to modify the low-level unbound workqueues cpumask through sysfs. This is performed by traversing the entire workqueue list and calling apply_wqattrs_prepare() on the unbound workqueues with the new low level mask. Only after all the preparation are done, we commit them all together. Ordered workqueues are ignored from the low level unbound workqueue cpumask, it will be handled in near future. All the (default & per-node) pwqs are mandatorily controlled by the low level cpumask. If the user configured cpumask doesn't overlap with the low level cpumask, the low level cpumask will be used for the wq instead. The comment of wq_calc_node_cpumask() is updated and explicitly requires that its first argument should be the attrs of the default pwq. The default wq_unbound_cpumask is cpu_possible_mask. The workqueue subsystem doesn't know its best default value, let the system manager or the other subsystem set it when needed. Changed from V8: merge the calculating code for the attrs of the default pwq together. minor change the code&comments for saving the user configured attrs. remove unnecessary list_del(). minor update the comment of wq_calc_node_cpumask(). update the comment of workqueue_set_unbound_cpumask(); Cc: Christoph Lameter Cc: Kevin Hilman Cc: Lai Jiangshan Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Tejun Heo Cc: Viresh Kumar Cc: Frederic Weisbecker Original-patch-by: Frederic Weisbecker Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index deee212af8e0..4618dd672d1b 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -424,6 +424,7 @@ struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask); void free_workqueue_attrs(struct workqueue_attrs *attrs); int apply_workqueue_attrs(struct workqueue_struct *wq, const struct workqueue_attrs *attrs); +int workqueue_set_unbound_cpumask(cpumask_var_t cpumask); extern bool queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work); -- cgit v1.2.3 From 0bb549052d33f8992544764a6cf1299d06ba7e2f Mon Sep 17 00:00:00 2001 From: Peter Jones Date: Tue, 28 Apr 2015 18:44:31 -0400 Subject: efi: Add esrt support Add sysfs files for the EFI System Resource Table (ESRT) under /sys/firmware/efi/esrt and for each EFI System Resource Entry under entries/ as a subdir. The EFI System Resource Table (ESRT) provides a read-only catalog of system components for which the system accepts firmware upgrades via UEFI's "Capsule Update" feature. This module allows userland utilities to evaluate what firmware updates can be applied to this system, and potentially arrange for those updates to occur. The ESRT is described as part of the UEFI specification, in version 2.5 which should be available from http://uefi.org/specifications in early 2015. If you're a member of the UEFI Forum, information about its addition to the standard is available as UEFI Mantis 1090. For some hardware platforms, additional restrictions may be found at http://msdn.microsoft.com/en-us/library/windows/hardware/jj128256.aspx , and additional documentation may be found at http://download.microsoft.com/download/5/F/5/5F5D16CD-2530-4289-8019-94C6A20BED3C/windows-uefi-firmware-update-platform.docx . Signed-off-by: Peter Jones Signed-off-by: Matt Fleming --- include/linux/efi.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index af5be0368dec..024c27e7c0fa 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -583,6 +583,9 @@ void efi_native_runtime_setup(void); #define EFI_FILE_INFO_ID \ EFI_GUID( 0x9576e92, 0x6d3f, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b ) +#define EFI_SYSTEM_RESOURCE_TABLE_GUID \ + EFI_GUID( 0xb122a263, 0x3661, 0x4f68, 0x99, 0x29, 0x78, 0xf8, 0xb0, 0xd6, 0x21, 0x80 ) + #define EFI_FILE_SYSTEM_GUID \ EFI_GUID( 0x964e5b22, 0x6459, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b ) @@ -823,6 +826,7 @@ extern struct efi { unsigned long fw_vendor; /* fw_vendor */ unsigned long runtime; /* runtime table */ unsigned long config_table; /* config tables */ + unsigned long esrt; /* ESRT table */ efi_get_time_t *get_time; efi_set_time_t *set_time; efi_get_wakeup_time_t *get_wakeup_time; @@ -875,6 +879,7 @@ static inline efi_status_t efi_query_variable_store(u32 attributes, unsigned lon #endif extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr); extern int efi_config_init(efi_config_table_type_t *arch_tables); +extern void __init efi_esrt_init(void); extern int efi_config_parse_tables(void *config_tables, int count, int sz, efi_config_table_type_t *arch_tables); extern u64 efi_get_iobase (void); @@ -882,12 +887,15 @@ extern u32 efi_mem_type (unsigned long phys_addr); extern u64 efi_mem_attributes (unsigned long phys_addr); extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size); extern int __init efi_uart_console_only (void); +extern u64 efi_mem_desc_end(efi_memory_desc_t *md); +extern int efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md); extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource, struct resource *bss_resource); extern void efi_get_time(struct timespec *now); extern void efi_reserve_boot_services(void); extern int efi_get_fdt_params(struct efi_fdt_params *params, int verbose); extern struct efi_memory_map memmap; +extern struct kobject *efi_kobj; extern int efi_reboot_quirk_mode; extern bool efi_poweroff_required(void); -- cgit v1.2.3 From fb7737e949e31d8a71acee6bbb670f32dbd2a2c0 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Wed, 4 Mar 2015 20:01:14 -0600 Subject: hwspinlock/core: add device tree support This patch adds a new OF-friendly API of_hwspin_lock_get_id() for hwspinlock clients to use/request locks from a hwspinlock device instantiated through a device-tree blob. This new API can be used by hwspinlock clients to get the id for a specific lock using the phandle + args specifier, so that it can be requested using the available hwspin_lock_request_specific() API. Signed-off-by: Suman Anna Reviewed-by: Bjorn Andersson [small comment clarification] Signed-off-by: Ohad Ben-Cohen --- include/linux/hwspinlock.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hwspinlock.h b/include/linux/hwspinlock.h index 3343298e40e8..859d673d98c8 100644 --- a/include/linux/hwspinlock.h +++ b/include/linux/hwspinlock.h @@ -26,6 +26,7 @@ #define HWLOCK_IRQ 0x02 /* Disable interrupts, don't save state */ struct device; +struct device_node; struct hwspinlock; struct hwspinlock_device; struct hwspinlock_ops; @@ -66,6 +67,7 @@ int hwspin_lock_unregister(struct hwspinlock_device *bank); struct hwspinlock *hwspin_lock_request(void); struct hwspinlock *hwspin_lock_request_specific(unsigned int id); int hwspin_lock_free(struct hwspinlock *hwlock); +int of_hwspin_lock_get_id(struct device_node *np, int index); int hwspin_lock_get_id(struct hwspinlock *hwlock); int __hwspin_lock_timeout(struct hwspinlock *, unsigned int, int, unsigned long *); @@ -120,6 +122,11 @@ void __hwspin_unlock(struct hwspinlock *hwlock, int mode, unsigned long *flags) { } +static inline int of_hwspin_lock_get_id(struct device_node *np, int index) +{ + return 0; +} + static inline int hwspin_lock_get_id(struct hwspinlock *hwlock) { return 0; -- cgit v1.2.3 From d54385ce68cd18ab002b46f61246ad197cec92de Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 30 Apr 2015 14:53:54 -0700 Subject: etherdev: Process is_multicast_ether_addr at same size as other operations This change makes it so that we process the address in is_multicast_ether_addr at the same size as the other calls. This allows us to avoid duplicate reads when used with other calls such as is_zero_ether_addr or eth_addr_copy. In addition I have added a 64 bit version of the function so in eth_type_trans we can process the destination address as a 64 bit value throughout. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 606563ef8a72..c4a10f991fe0 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -110,7 +110,29 @@ static inline bool is_zero_ether_addr(const u8 *addr) */ static inline bool is_multicast_ether_addr(const u8 *addr) { - return 0x01 & addr[0]; +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + u32 a = *(const u32 *)addr; +#else + u16 a = *(const u16 *)addr; +#endif +#ifdef __BIG_ENDIAN + return 0x01 & (a >> ((sizeof(a) * 8) - 8)); +#else + return 0x01 & a; +#endif +} + +static inline bool is_multicast_ether_addr_64bits(const u8 addr[6+2]) +{ +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 +#ifdef __BIG_ENDIAN + return 0x01 & ((*(const u64 *)addr) >> 56); +#else + return 0x01 & (*(const u64 *)addr); +#endif +#else + return is_multicast_ether_addr(addr); +#endif } /** -- cgit v1.2.3 From 50fb799289501c2eab9f43fc9af513027e1e994f Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 1 May 2015 11:30:12 -0700 Subject: net: Add skb_get_hash_perturb This calls flow_disect and __skb_get_hash to procure a hash for a packet. Input includes a key to initialize jhash. This function does not set skb->hash. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 66e374d62f64..acb83e249e3f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -927,6 +927,8 @@ static inline __u32 skb_get_hash(struct sk_buff *skb) return skb->hash; } +__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb); + static inline __u32 skb_get_hash_raw(const struct sk_buff *skb) { return skb->hash; -- cgit v1.2.3 From 7829fb09a2b4268b30dd9bc782fa5ebee278b137 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 30 Apr 2015 04:13:52 +0200 Subject: lib: make memzero_explicit more robust against dead store elimination In commit 0b053c951829 ("lib: memzero_explicit: use barrier instead of OPTIMIZER_HIDE_VAR"), we made memzero_explicit() more robust in case LTO would decide to inline memzero_explicit() and eventually find out it could be elimiated as dead store. While using barrier() works well for the case of gcc, recent efforts from LLVMLinux people suggest to use llvm as an alternative to gcc, and there, Stephan found in a simple stand-alone user space example that llvm could nevertheless optimize and thus elimitate the memset(). A similar issue has been observed in the referenced llvm bug report, which is regarded as not-a-bug. Based on some experiments, icc is a bit special on its own, while it doesn't seem to eliminate the memset(), it could do so with an own implementation, and then result in similar findings as with llvm. The fix in this patch now works for all three compilers (also tested with more aggressive optimization levels). Arguably, in the current kernel tree it's more of a theoretical issue, but imho, it's better to be pedantic about it. It's clearly visible with gcc/llvm though, with the below code: if we would have used barrier() only here, llvm would have omitted clearing, not so with barrier_data() variant: static inline void memzero_explicit(void *s, size_t count) { memset(s, 0, count); barrier_data(s); } int main(void) { char buff[20]; memzero_explicit(buff, sizeof(buff)); return 0; } $ gcc -O2 test.c $ gdb a.out (gdb) disassemble main Dump of assembler code for function main: 0x0000000000400400 <+0>: lea -0x28(%rsp),%rax 0x0000000000400405 <+5>: movq $0x0,-0x28(%rsp) 0x000000000040040e <+14>: movq $0x0,-0x20(%rsp) 0x0000000000400417 <+23>: movl $0x0,-0x18(%rsp) 0x000000000040041f <+31>: xor %eax,%eax 0x0000000000400421 <+33>: retq End of assembler dump. $ clang -O2 test.c $ gdb a.out (gdb) disassemble main Dump of assembler code for function main: 0x00000000004004f0 <+0>: xorps %xmm0,%xmm0 0x00000000004004f3 <+3>: movaps %xmm0,-0x18(%rsp) 0x00000000004004f8 <+8>: movl $0x0,-0x8(%rsp) 0x0000000000400500 <+16>: lea -0x18(%rsp),%rax 0x0000000000400505 <+21>: xor %eax,%eax 0x0000000000400507 <+23>: retq End of assembler dump. As gcc, clang, but also icc defines __GNUC__, it's sufficient to define this in compiler-gcc.h only to be picked up. For a fallback or otherwise unsupported compiler, we define it as a barrier. Similarly, for ecc which does not support gcc inline asm. Reference: https://llvm.org/bugs/show_bug.cgi?id=15495 Reported-by: Stephan Mueller Tested-by: Stephan Mueller Signed-off-by: Daniel Borkmann Cc: Theodore Ts'o Cc: Stephan Mueller Cc: Hannes Frederic Sowa Cc: mancha security Cc: Mark Charlebois Cc: Behan Webster Signed-off-by: Herbert Xu --- include/linux/compiler-gcc.h | 16 +++++++++++++++- include/linux/compiler-intel.h | 3 +++ include/linux/compiler.h | 4 ++++ 3 files changed, 22 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index cdf13ca7cac3..371e560d13cf 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -9,10 +9,24 @@ + __GNUC_MINOR__ * 100 \ + __GNUC_PATCHLEVEL__) - /* Optimization barrier */ + /* The "volatile" is due to gcc bugs */ #define barrier() __asm__ __volatile__("": : :"memory") +/* + * This version is i.e. to prevent dead stores elimination on @ptr + * where gcc and llvm may behave differently when otherwise using + * normal barrier(): while gcc behavior gets along with a normal + * barrier(), llvm needs an explicit input variable to be assumed + * clobbered. The issue is as follows: while the inline asm might + * access any memory it wants, the compiler could have fit all of + * @ptr into memory registers instead, and since @ptr never escaped + * from that, it proofed that the inline asm wasn't touching any of + * it. This version works well with both compilers, i.e. we're telling + * the compiler that the inline asm absolutely may see the contents + * of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495 + */ +#define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory") /* * This macro obfuscates arithmetic on a variable address so that gcc diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h index ba147a1727e6..0c9a2f2c2802 100644 --- a/include/linux/compiler-intel.h +++ b/include/linux/compiler-intel.h @@ -13,9 +13,12 @@ /* Intel ECC compiler doesn't support gcc specific asm stmts. * It uses intrinsics to do the equivalent things. */ +#undef barrier_data #undef RELOC_HIDE #undef OPTIMIZER_HIDE_VAR +#define barrier_data(ptr) barrier() + #define RELOC_HIDE(ptr, off) \ ({ unsigned long __ptr; \ __ptr = (unsigned long) (ptr); \ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 0e41ca0e5927..867722591be2 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -169,6 +169,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # define barrier() __memory_barrier() #endif +#ifndef barrier_data +# define barrier_data(ptr) barrier() +#endif + /* Unreachable code */ #ifndef unreachable # define unreachable() do { } while (1) -- cgit v1.2.3 From 20f56758b04364c3c139edbffde8cfaf0307edee Mon Sep 17 00:00:00 2001 From: Jacek Anaszewski Date: Tue, 28 Apr 2015 00:18:41 -0700 Subject: leds: unify the location of led-trigger API Part of led-trigger API was in the private drivers/leds/leds.h header. Move it to the include/linux/leds.h header to unify the API location and announce it as public. It has been already exported from led-triggers.c with EXPORT_SYMBOL_GPL macro. The no-op definitions are changed from macros to inline to match the style of the surrounding code. Signed-off-by: Jacek Anaszewski Cc: Richard Purdie Acked-by: Sakari Ailus Signed-off-by: Bryan Wu --- include/linux/leds.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index 9a2b000094cf..b122eeafb5dc 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -12,6 +12,7 @@ #ifndef __LINUX_LEDS_H_INCLUDED #define __LINUX_LEDS_H_INCLUDED +#include #include #include #include @@ -222,6 +223,11 @@ struct led_trigger { struct list_head next_trig; }; +ssize_t led_trigger_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count); +ssize_t led_trigger_show(struct device *dev, struct device_attribute *attr, + char *buf); + /* Registration functions for complex triggers */ extern int led_trigger_register(struct led_trigger *trigger); extern void led_trigger_unregister(struct led_trigger *trigger); @@ -238,6 +244,16 @@ extern void led_trigger_blink_oneshot(struct led_trigger *trigger, unsigned long *delay_on, unsigned long *delay_off, int invert); +extern void led_trigger_set_default(struct led_classdev *led_cdev); +extern void led_trigger_set(struct led_classdev *led_cdev, + struct led_trigger *trigger); +extern void led_trigger_remove(struct led_classdev *led_cdev); + +static inline void *led_get_trigger_data(struct led_classdev *led_cdev) +{ + return led_cdev->trigger_data; +} + /** * led_trigger_rename_static - rename a trigger * @name: the new trigger name @@ -267,6 +283,15 @@ static inline void led_trigger_register_simple(const char *name, static inline void led_trigger_unregister_simple(struct led_trigger *trigger) {} static inline void led_trigger_event(struct led_trigger *trigger, enum led_brightness event) {} +static inline void led_trigger_set_default(struct led_classdev *led_cdev) {} +static inline void led_trigger_set(struct led_classdev *led_cdev, + struct led_trigger *trigger) {} +static inline void led_trigger_remove(struct led_classdev *led_cdev) {} +static inline void *led_get_trigger_data(struct led_classdev *led_cdev) +{ + return NULL; +} + #endif /* CONFIG_LEDS_TRIGGERS */ /* Trigger specific functions */ -- cgit v1.2.3 From 9afd85c9e4552b276e2f4cfefd622bdeeffbbf26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Sat, 2 May 2015 14:01:07 +0200 Subject: net: Export IGMP/MLD message validation code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With this patch, the IGMP and MLD message validation functions are moved from the bridge code to IPv4/IPv6 multicast files. Some small refactoring was done to enhance readibility and to iron out some differences in behaviour between the IGMP and MLD parsing code (e.g. the skb-cloning of MLD messages is now only done if necessary, just like the IGMP part always did). Finally, these IGMP and MLD message validation functions are exported so that not only the bridge can use it but batman-adv later, too. Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- include/linux/igmp.h | 1 + include/linux/skbuff.h | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 2c677afeea47..193ad488d3e2 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -130,5 +130,6 @@ extern void ip_mc_unmap(struct in_device *); extern void ip_mc_remap(struct in_device *); extern void ip_mc_dec_group(struct in_device *in_dev, __be32 addr); extern void ip_mc_inc_group(struct in_device *in_dev, __be32 addr); +int ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed); #endif diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index acb83e249e3f..9c2f793573fa 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3419,6 +3419,9 @@ static inline void skb_checksum_none_assert(const struct sk_buff *skb) bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); int skb_checksum_setup(struct sk_buff *skb, bool recalculate); +struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb, + unsigned int transport_len, + __sum16(*skb_chkf)(struct sk_buff *skb)); u32 skb_get_poff(const struct sk_buff *skb); u32 __skb_get_poff(const struct sk_buff *skb, void *data, -- cgit v1.2.3 From b2387ddcced8de3e6471a2fb16a409577063016f Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 1 May 2015 09:59:44 -0700 Subject: blk-mq: fix FUA request hang When a FUA request enters its DATA stage of flush pipeline, the request is added to mq requeue list, the request will then be added to ctx->rq_list. blk_mq_attempt_merge() might merge the request with a bio. Later when the request is finished the flush pipeline, the request->__data_len is 0. Then I only saw the bio gets endio called, the original request never finish. Adding REQ_FLUSH_SEQ into REQ_NOMERGE_FLAGS looks an easy fix. stable: 3.15+ Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index a1b25e35ea5f..b7299febc4b4 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -220,7 +220,7 @@ enum rq_flag_bits { /* This mask is used for both bio and request merge checking */ #define REQ_NOMERGE_FLAGS \ - (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA) + (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA | REQ_FLUSH_SEQ) #define REQ_RAHEAD (1ULL << __REQ_RAHEAD) #define REQ_THROTTLED (1ULL << __REQ_THROTTLED) -- cgit v1.2.3 From 6cd9e9f629f11b9412d4e9aa294c029dbb36b3cf Mon Sep 17 00:00:00 2001 From: Kapileshwar Singh Date: Wed, 18 Feb 2015 16:04:21 +0000 Subject: thermal: of: fix cooling device weights in device tree Currently you can specify the weight of the cooling device in the device tree but that information is not populated to the thermal_bind_params where the fair share governor expects it to be. The of thermal zone device doesn't have a thermal_bind_params structure and arguably it's better to pass the weight inside the thermal_instance as it is specific to the bind of a cooling device to a thermal zone parameter. Core thermal code is fixed to populate the weight in the instance from the thermal_bind_params, so platform code that was passing the weight inside the thermal_bind_params continue to work seamlessly. While we are at it, create a default value for the weight parameter for those thermal zones that currently don't define it and remove the hardcoded default in of-thermal. Cc: Zhang Rui Cc: "Rafael J. Wysocki" Cc: Len Brown Cc: Peter Feuerer Cc: Darren Hart Cc: Eduardo Valentin Cc: Kukjin Kim Cc: Durgadoss R Signed-off-by: Kapileshwar Singh Signed-off-by: Eduardo Valentin --- include/linux/thermal.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 5eac316490ea..00dacd4dfdce 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -40,6 +40,9 @@ /* No upper/lower limit requirement */ #define THERMAL_NO_LIMIT ((u32)~0) +/* Default weight of a bound cooling device */ +#define THERMAL_WEIGHT_DEFAULT 0 + /* Unit conversion macros */ #define KELVIN_TO_CELSIUS(t) (long)(((long)t-2732 >= 0) ? \ ((long)t-2732+5)/10 : ((long)t-2732-5)/10) @@ -323,7 +326,8 @@ void thermal_zone_device_unregister(struct thermal_zone_device *); int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *, - unsigned long, unsigned long); + unsigned long, unsigned long, + unsigned int); int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *); void thermal_zone_device_update(struct thermal_zone_device *); -- cgit v1.2.3 From bcdcbbc71125c37195f97314f453ca9a3a4eb758 Mon Sep 17 00:00:00 2001 From: Javi Merino Date: Wed, 18 Feb 2015 16:04:25 +0000 Subject: thermal: fair_share: generalize the weight concept The fair share governor has the concept of weights, which is the influence of each cooling device in a thermal zone. The current implementation forces the weights of all cooling devices in a thermal zone to add up to a 100. This complicates setups, as you need to know in advance how many cooling devices you are going to have. If you bind a new cooling device, you have to modify all the other cooling devices weights, which is error prone. Furthermore, you can't specify a "default" weight for platforms since that default value depends on the number of cooling devices in the platform. This patch generalizes the concept of weight by allowing any number to be a "weight". Weights are now relative to each other. Platforms that don't specify weights get the same default value for all their cooling devices, so all their cdevs are considered to be equally influential. It's important to note that previous users of the weights don't need to alter the code: percentages continue to work as they used to. This patch just removes the constraint of all the weights in a thermal zone having to add up to a 100. If they do, you get the same behavior as before. If they don't, fair share now works for that platform. Cc: Zhang Rui Cc: Eduardo Valentin Cc: Durgadoss R Acked-by: Durgadoss R Signed-off-by: Javi Merino Signed-off-by: Eduardo Valentin --- include/linux/thermal.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 00dacd4dfdce..bac0f52c7a1e 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -217,9 +217,12 @@ struct thermal_bind_params { /* * This is a measure of 'how effectively these devices can - * cool 'this' thermal zone. The shall be determined by platform - * characterization. This is on a 'percentage' scale. - * See Documentation/thermal/sysfs-api.txt for more information. + * cool 'this' thermal zone. It shall be determined by + * platform characterization. This value is relative to the + * rest of the weights so a cooling device whose weight is + * double that of another cooling device is twice as + * effective. See Documentation/thermal/sysfs-api.txt for more + * information. */ int weight; -- cgit v1.2.3 From e33df1d2f3a0141cd79e770f31999ba0dd7ebfa8 Mon Sep 17 00:00:00 2001 From: Javi Merino Date: Thu, 26 Feb 2015 19:00:27 +0000 Subject: thermal: let governors have private data for each thermal zone A governor may need to store its current state between calls to throttle(). That state depends on the thermal zone, so store it as private data in struct thermal_zone_device. The governors may have two new ops: bind_to_tz() and unbind_from_tz(). When provided, these functions let governors do some initialization and teardown when they are bound/unbound to a tz and possibly store that information in the governor_data field of the struct thermal_zone_device. Cc: Zhang Rui Cc: Eduardo Valentin Signed-off-by: Javi Merino Signed-off-by: Eduardo Valentin --- include/linux/thermal.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index bac0f52c7a1e..edf9d53c67e6 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -165,6 +165,7 @@ struct thermal_attr { * @ops: operations this &thermal_zone_device supports * @tzp: thermal zone parameters * @governor: pointer to the governor for this thermal zone + * @governor_data: private pointer for governor data * @thermal_instances: list of &struct thermal_instance of this thermal zone * @idr: &struct idr to generate unique id for this zone's cooling * devices @@ -191,6 +192,7 @@ struct thermal_zone_device { struct thermal_zone_device_ops *ops; const struct thermal_zone_params *tzp; struct thermal_governor *governor; + void *governor_data; struct list_head thermal_instances; struct idr idr; struct mutex lock; @@ -201,12 +203,19 @@ struct thermal_zone_device { /** * struct thermal_governor - structure that holds thermal governor information * @name: name of the governor + * @bind_to_tz: callback called when binding to a thermal zone. If it + * returns 0, the governor is bound to the thermal zone, + * otherwise it fails. + * @unbind_from_tz: callback called when a governor is unbound from a + * thermal zone. * @throttle: callback called for every trip point even if temperature is * below the trip point temperature * @governor_list: node in thermal_governor_list (in thermal_core.c) */ struct thermal_governor { char name[THERMAL_NAME_LENGTH]; + int (*bind_to_tz)(struct thermal_zone_device *tz); + void (*unbind_from_tz)(struct thermal_zone_device *tz); int (*throttle)(struct thermal_zone_device *tz, int trip); struct list_head governor_list; }; -- cgit v1.2.3 From 35b11d2e3a66279a477e36cefb2603806295b8ce Mon Sep 17 00:00:00 2001 From: Javi Merino Date: Thu, 26 Feb 2015 19:00:28 +0000 Subject: thermal: extend the cooling device API to include power information Add three optional callbacks to the cooling device interface to allow them to express power. In addition to the callbacks, add helpers to identify cooling devices that implement the power cooling device API. Cc: Zhang Rui Cc: Eduardo Valentin Signed-off-by: Javi Merino Signed-off-by: Eduardo Valentin --- include/linux/thermal.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index edf9d53c67e6..bf3c55f405c2 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -63,6 +63,7 @@ struct thermal_zone_device; struct thermal_cooling_device; +struct thermal_instance; enum thermal_device_mode { THERMAL_DEVICE_DISABLED = 0, @@ -116,6 +117,12 @@ struct thermal_cooling_device_ops { int (*get_max_state) (struct thermal_cooling_device *, unsigned long *); int (*get_cur_state) (struct thermal_cooling_device *, unsigned long *); int (*set_cur_state) (struct thermal_cooling_device *, unsigned long); + int (*get_requested_power)(struct thermal_cooling_device *, + struct thermal_zone_device *, u32 *); + int (*state2power)(struct thermal_cooling_device *, + struct thermal_zone_device *, unsigned long, u32 *); + int (*power2state)(struct thermal_cooling_device *, + struct thermal_zone_device *, u32, unsigned long *); }; struct thermal_cooling_device { @@ -331,6 +338,16 @@ void thermal_zone_of_sensor_unregister(struct device *dev, #endif #if IS_ENABLED(CONFIG_THERMAL) +static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev) +{ + return cdev->ops->get_requested_power && cdev->ops->state2power && + cdev->ops->power2state; +} + +int power_actor_get_max_power(struct thermal_cooling_device *, + struct thermal_zone_device *tz, u32 *max_power); +int power_actor_set_power(struct thermal_cooling_device *, + struct thermal_instance *, u32); struct thermal_zone_device *thermal_zone_device_register(const char *, int, int, void *, struct thermal_zone_device_ops *, const struct thermal_zone_params *, int, int); @@ -359,6 +376,14 @@ struct thermal_instance *get_thermal_instance(struct thermal_zone_device *, void thermal_cdev_update(struct thermal_cooling_device *); void thermal_notify_framework(struct thermal_zone_device *, int); #else +static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev) +{ return false; } +static inline int power_actor_get_max_power(struct thermal_cooling_device *cdev, + struct thermal_zone_device *tz, u32 *max_power) +{ return 0; } +static inline int power_actor_set_power(struct thermal_cooling_device *cdev, + struct thermal_instance *tz, u32 power) +{ return 0; } static inline struct thermal_zone_device *thermal_zone_device_register( const char *type, int trips, int mask, void *devdata, struct thermal_zone_device_ops *ops, -- cgit v1.2.3 From c36cf07176316fbe6a4bdbc23afcb0cbf7822bf2 Mon Sep 17 00:00:00 2001 From: Javi Merino Date: Thu, 26 Feb 2015 19:00:29 +0000 Subject: thermal: cpu_cooling: implement the power cooling device API Add a basic power model to the cpu cooling device to implement the power cooling device API. The power model uses the current frequency, current load and OPPs for the power calculations. The cpus must have registered their OPPs using the OPP library. Cc: Zhang Rui Cc: Eduardo Valentin Signed-off-by: Kapileshwar Singh Signed-off-by: Punit Agrawal Signed-off-by: Javi Merino Signed-off-by: Eduardo Valentin --- include/linux/cpu_cooling.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h index bd955270d5aa..c156f5082758 100644 --- a/include/linux/cpu_cooling.h +++ b/include/linux/cpu_cooling.h @@ -28,6 +28,9 @@ #include #include +typedef int (*get_static_t)(cpumask_t *cpumask, int interval, + unsigned long voltage, u32 *power); + #ifdef CONFIG_CPU_THERMAL /** * cpufreq_cooling_register - function to create cpufreq cooling device. @@ -36,6 +39,10 @@ struct thermal_cooling_device * cpufreq_cooling_register(const struct cpumask *clip_cpus); +struct thermal_cooling_device * +cpufreq_power_cooling_register(const struct cpumask *clip_cpus, + u32 capacitance, get_static_t plat_static_func); + /** * of_cpufreq_cooling_register - create cpufreq cooling device based on DT. * @np: a valid struct device_node to the cooling device device tree node. @@ -45,6 +52,12 @@ cpufreq_cooling_register(const struct cpumask *clip_cpus); struct thermal_cooling_device * of_cpufreq_cooling_register(struct device_node *np, const struct cpumask *clip_cpus); + +struct thermal_cooling_device * +of_cpufreq_power_cooling_register(struct device_node *np, + const struct cpumask *clip_cpus, + u32 capacitance, + get_static_t plat_static_func); #else static inline struct thermal_cooling_device * of_cpufreq_cooling_register(struct device_node *np, @@ -52,6 +65,15 @@ of_cpufreq_cooling_register(struct device_node *np, { return ERR_PTR(-ENOSYS); } + +static inline struct thermal_cooling_device * +of_cpufreq_power_cooling_register(struct device_node *np, + const struct cpumask *clip_cpus, + u32 capacitance, + get_static_t plat_static_func) +{ + return NULL; +} #endif /** @@ -67,12 +89,29 @@ cpufreq_cooling_register(const struct cpumask *clip_cpus) { return ERR_PTR(-ENOSYS); } +static inline struct thermal_cooling_device * +cpufreq_power_cooling_register(const struct cpumask *clip_cpus, + u32 capacitance, get_static_t plat_static_func) +{ + return NULL; +} + static inline struct thermal_cooling_device * of_cpufreq_cooling_register(struct device_node *np, const struct cpumask *clip_cpus) { return ERR_PTR(-ENOSYS); } + +static inline struct thermal_cooling_device * +of_cpufreq_power_cooling_register(struct device_node *np, + const struct cpumask *clip_cpus, + u32 capacitance, + get_static_t plat_static_func) +{ + return NULL; +} + static inline void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) { -- cgit v1.2.3 From 6b775e870c56c59c3e16531ea2307b797395f9f7 Mon Sep 17 00:00:00 2001 From: Javi Merino Date: Mon, 2 Mar 2015 17:17:19 +0000 Subject: thermal: introduce the Power Allocator governor The power allocator governor is a thermal governor that controls system and device power allocation to control temperature. Conceptually, the implementation divides the sustainable power of a thermal zone among all the heat sources in that zone. This governor relies on "power actors", entities that represent heat sources. They can report current and maximum power consumption and can set a given maximum power consumption, usually via a cooling device. The governor uses a Proportional Integral Derivative (PID) controller driven by the temperature of the thermal zone. The output of the controller is a power budget that is then allocated to each power actor that can have bearing on the temperature we are trying to control. It decides how much power to give each cooling device based on the performance they are requesting. The PID controller ensures that the total power budget does not exceed the control temperature. Cc: Zhang Rui Cc: Eduardo Valentin Signed-off-by: Punit Agrawal Signed-off-by: Javi Merino Signed-off-by: Eduardo Valentin --- include/linux/thermal.h | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index bf3c55f405c2..6bbe11c97cea 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -59,6 +59,8 @@ #define DEFAULT_THERMAL_GOVERNOR "fair_share" #elif defined(CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE) #define DEFAULT_THERMAL_GOVERNOR "user_space" +#elif defined(CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR) +#define DEFAULT_THERMAL_GOVERNOR "power_allocator" #endif struct thermal_zone_device; @@ -154,8 +156,7 @@ struct thermal_attr { * @devdata: private pointer for device private data * @trips: number of trip points the thermal zone supports * @passive_delay: number of milliseconds to wait between polls when - * performing passive cooling. Currenty only used by the - * step-wise governor + * performing passive cooling. * @polling_delay: number of milliseconds to wait between polls when * checking whether trip points have been crossed (0 for * interrupt driven systems) @@ -165,7 +166,6 @@ struct thermal_attr { * @last_temperature: previous temperature read * @emul_temperature: emulated temperature when using CONFIG_THERMAL_EMULATION * @passive: 1 if you've crossed a passive trip point, 0 otherwise. - * Currenty only used by the step-wise governor. * @forced_passive: If > 0, temperature at which to switch on all ACPI * processor cooling devices. Currently only used by the * step-wise governor. @@ -197,7 +197,7 @@ struct thermal_zone_device { int passive; unsigned int forced_passive; struct thermal_zone_device_ops *ops; - const struct thermal_zone_params *tzp; + struct thermal_zone_params *tzp; struct thermal_governor *governor; void *governor_data; struct list_head thermal_instances; @@ -275,6 +275,33 @@ struct thermal_zone_params { int num_tbps; /* Number of tbp entries */ struct thermal_bind_params *tbp; + + /* + * Sustainable power (heat) that this thermal zone can dissipate in + * mW + */ + u32 sustainable_power; + + /* + * Proportional parameter of the PID controller when + * overshooting (i.e., when temperature is below the target) + */ + s32 k_po; + + /* + * Proportional parameter of the PID controller when + * undershooting + */ + s32 k_pu; + + /* Integral parameter of the PID controller */ + s32 k_i; + + /* Derivative parameter of the PID controller */ + s32 k_d; + + /* threshold below which the error is no longer accumulated */ + s32 integral_cutoff; }; struct thermal_genl_event { @@ -350,7 +377,7 @@ int power_actor_set_power(struct thermal_cooling_device *, struct thermal_instance *, u32); struct thermal_zone_device *thermal_zone_device_register(const char *, int, int, void *, struct thermal_zone_device_ops *, - const struct thermal_zone_params *, int, int); + struct thermal_zone_params *, int, int); void thermal_zone_device_unregister(struct thermal_zone_device *); int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int, -- cgit v1.2.3 From f31105347cc56c13d552b844ada04418769d875d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 28 Apr 2015 12:17:50 +0200 Subject: irqchip: irqc: Remove platform data support As of commit 914d7d148411997c ("ARM: shmobile: r8a73a4: Remove legacy code"), the Renesas R-Mobile/R-Car interrupt controller is used with DT only, and interrupt numbers are thus always assigned automatically. Drop the platform data declaration and all related support code. Signed-off-by: Geert Uytterhoeven Cc: Magnus Damm Cc: Jason Cooper Link: http://lkml.kernel.org/r/1430216270-31929-1-git-send-email-geert%2Brenesas@glider.be Signed-off-by: Thomas Gleixner --- include/linux/platform_data/irq-renesas-irqc.h | 27 -------------------------- 1 file changed, 27 deletions(-) delete mode 100644 include/linux/platform_data/irq-renesas-irqc.h (limited to 'include/linux') diff --git a/include/linux/platform_data/irq-renesas-irqc.h b/include/linux/platform_data/irq-renesas-irqc.h deleted file mode 100644 index 3ae17b3e00ed..000000000000 --- a/include/linux/platform_data/irq-renesas-irqc.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Renesas IRQC Driver - * - * Copyright (C) 2013 Magnus Damm - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef __IRQ_RENESAS_IRQC_H__ -#define __IRQ_RENESAS_IRQC_H__ - -struct renesas_irqc_config { - unsigned int irq_base; -}; - -#endif /* __IRQ_RENESAS_IRQC_H__ */ -- cgit v1.2.3 From 406c057c4e00744453d5b0731eb23629ec14dcdf Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Mon, 4 May 2015 21:54:20 -0400 Subject: libata: READ LOG DMA EXT support can be in either page 119 or 120 Support for the READ/WRITE LOG DMA EXT commands can be signaled either in page 119 or page 120. We should check both pages. Signed-off-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Signed-off-by: Tejun Heo --- include/linux/ata.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index b666b773e111..fed36418dd1c 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -704,9 +704,19 @@ static inline bool ata_id_wcache_enabled(const u16 *id) static inline bool ata_id_has_read_log_dma_ext(const u16 *id) { + /* Word 86 must have bit 15 set */ if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15))) return false; - return id[ATA_ID_COMMAND_SET_3] & (1 << 3); + + /* READ LOG DMA EXT support can be signaled either from word 119 + * or from word 120. The format is the same for both words: Bit + * 15 must be cleared, bit 14 set and bit 3 set. + */ + if ((id[ATA_ID_COMMAND_SET_3] & 0xC008) == 0x4008 || + (id[ATA_ID_COMMAND_SET_4] & 0xC008) == 0x4008) + return true; + + return false; } static inline bool ata_id_has_sense_reporting(const u16 *id) -- cgit v1.2.3 From 5d3abf8ff67f49271a42c0f7fa4f20f9e046bf0e Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Mon, 4 May 2015 21:54:21 -0400 Subject: libata: Fall back to unqueued READ LOG EXT if the DMA variant fails Some devices advertise support for the READ/WRITE LOG DMA EXT commands but fail when we try to issue them. This can lead to queued TRIM being unintentionally disabled since the relevant feature flag is located in a general purpose log page. Fall back to unqueued READ LOG EXT if the DMA variant fails while reading a log page. Signed-off-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Signed-off-by: Tejun Heo --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 8dad4a307bb8..c3ef58014b33 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -424,6 +424,7 @@ enum { ATA_HORKAGE_NOLPM = (1 << 20), /* don't use LPM */ ATA_HORKAGE_WD_BROKEN_LPM = (1 << 21), /* some WDs have broken LPM */ ATA_HORKAGE_ZERO_AFTER_TRIM = (1 << 22),/* guarantees zero after trim */ + ATA_HORKAGE_NO_NCQ_LOG = (1 << 23), /* don't use NCQ for log read */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ -- cgit v1.2.3 From c4cf5261f8bffd9de132b50660a69148e7575bd6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 17 Apr 2015 16:15:18 -0600 Subject: bio: skip atomic inc/dec of ->bi_remaining for non-chains Struct bio has an atomic ref count for chained bio's, and we use this to know when to end IO on the bio. However, most bio's are not chained, so we don't need to always introduce this atomic operation as part of ending IO. Add a helper to elevate the bi_remaining count, and flag the bio as now actually needing the decrement at end_io time. Rename the field to __bi_remaining to catch any current users of this doing the incrementing manually. For high IOPS workloads, this reduces the overhead of bio_endio() substantially. Tested-by: Robert Elliott Acked-by: Kent Overstreet Reviewed-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/bio.h | 11 +++++++++++ include/linux/blk_types.h | 3 ++- 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index da3a127c9958..8bfe9eee6d1a 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -644,6 +644,17 @@ static inline struct bio *bio_list_get(struct bio_list *bl) return bio; } +/* + * Increment chain count for the bio. Make sure the CHAIN flag update + * is visible before the raised count. + */ +static inline void bio_inc_remaining(struct bio *bio) +{ + bio->bi_flags |= (1 << BIO_CHAIN); + smp_mb__before_atomic(); + atomic_inc(&bio->__bi_remaining); +} + /* * bio_set is used to allow other portions of the IO system to * allocate their own private memory pools for bio and iovec structures. diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index a1b25e35ea5f..8b07e0603887 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -65,7 +65,7 @@ struct bio { unsigned int bi_seg_front_size; unsigned int bi_seg_back_size; - atomic_t bi_remaining; + atomic_t __bi_remaining; bio_end_io_t *bi_end_io; @@ -122,6 +122,7 @@ struct bio { #define BIO_NULL_MAPPED 8 /* contains invalid user pages */ #define BIO_QUIET 9 /* Make BIO Quiet */ #define BIO_SNAP_STABLE 10 /* bio data must be snapshotted during write */ +#define BIO_CHAIN 11 /* chained bio, ->bi_remaining in effect */ /* * Flags starting here get preserved by bio_reset() - this includes -- cgit v1.2.3 From dac56212e8127dbc0bff7be35c508bc280213309 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 17 Apr 2015 16:23:59 -0600 Subject: bio: skip atomic inc/dec of ->bi_cnt for most use cases Struct bio has a reference count that controls when it can be freed. Most uses cases is allocating the bio, which then returns with a single reference to it, doing IO, and then dropping that single reference. We can remove this atomic_dec_and_test() in the completion path, if nobody else is holding a reference to the bio. If someone does call bio_get() on the bio, then we flag the bio as now having valid count and that we must properly honor the reference count when it's being put. Tested-by: Robert Elliott Signed-off-by: Jens Axboe --- include/linux/bio.h | 16 +++++++++++++++- include/linux/blk_types.h | 3 ++- 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 8bfe9eee6d1a..7486ea103f6e 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -290,7 +290,21 @@ static inline unsigned bio_segments(struct bio *bio) * returns. and then bio would be freed memory when if (bio->bi_flags ...) * runs */ -#define bio_get(bio) atomic_inc(&(bio)->bi_cnt) +static inline void bio_get(struct bio *bio) +{ + bio->bi_flags |= (1 << BIO_REFFED); + smp_mb__before_atomic(); + atomic_inc(&bio->__bi_cnt); +} + +static inline void bio_cnt_set(struct bio *bio, unsigned int count) +{ + if (count != 1) { + bio->bi_flags |= (1 << BIO_REFFED); + smp_mb__before_atomic(); + } + atomic_set(&bio->__bi_cnt, count); +} enum bip_flags { BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 8b07e0603887..93d2e7153816 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -92,7 +92,7 @@ struct bio { unsigned short bi_max_vecs; /* max bvl_vecs we can hold */ - atomic_t bi_cnt; /* pin count */ + atomic_t __bi_cnt; /* pin count */ struct bio_vec *bi_io_vec; /* the actual vec list */ @@ -123,6 +123,7 @@ struct bio { #define BIO_QUIET 9 /* Make BIO Quiet */ #define BIO_SNAP_STABLE 10 /* bio data must be snapshotted during write */ #define BIO_CHAIN 11 /* chained bio, ->bi_remaining in effect */ +#define BIO_REFFED 12 /* bio has elevated ->bi_cnt */ /* * Flags starting here get preserved by bio_reset() - this includes -- cgit v1.2.3 From 84be456f883c4685680fba8e5154b5f72e92957e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 1 May 2015 12:46:15 +0200 Subject: remove We don't have any arch specific scatterlist now that parisc switched over to the generic one. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 +-- include/linux/dmapool.h | 2 +- include/linux/scatterlist.h | 39 ++++++++++++++++++++++++++++++++------- 3 files changed, 34 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7f9a516f24de..504af1e65ce1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -22,8 +22,7 @@ #include #include #include - -#include +#include struct module; struct scsi_ioctl_command; diff --git a/include/linux/dmapool.h b/include/linux/dmapool.h index 52456aa566a0..e1043f79122f 100644 --- a/include/linux/dmapool.h +++ b/include/linux/dmapool.h @@ -11,8 +11,8 @@ #ifndef LINUX_DMAPOOL_H #define LINUX_DMAPOOL_H +#include #include -#include struct device; diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index ed8f9e70df9b..eca1ec93775c 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -2,13 +2,39 @@ #define _LINUX_SCATTERLIST_H #include +#include #include #include - -#include -#include #include +struct scatterlist { +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; + unsigned int offset; + unsigned int length; + dma_addr_t dma_address; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + unsigned int dma_length; +#endif +}; + +/* + * These macros should be used after a dma_map_sg call has been done + * to get bus addresses of each of the SG entries and their lengths. + * You should only work with the number of sg entries dma_map_sg + * returns, or alternatively stop on the first sg_dma_len(sg) which + * is 0. + */ +#define sg_dma_address(sg) ((sg)->dma_address) + +#ifdef CONFIG_NEED_SG_DMA_LENGTH +#define sg_dma_len(sg) ((sg)->dma_length) +#else +#define sg_dma_len(sg) ((sg)->length) +#endif + struct sg_table { struct scatterlist *sgl; /* the list */ unsigned int nents; /* number of mapped entries */ @@ -18,10 +44,9 @@ struct sg_table { /* * Notes on SG table design. * - * Architectures must provide an unsigned long page_link field in the - * scatterlist struct. We use that to place the page pointer AND encode - * information about the sg table as well. The two lower bits are reserved - * for this information. + * We use the unsigned long page_link field in the scatterlist struct to place + * the page pointer AND encode information about the sg table as well. The two + * lower bits are reserved for this information. * * If bit 0 is set, then the page_link contains a pointer to the next sg * table list. Otherwise the next entry is at sg + 1. -- cgit v1.2.3 From 4f8c9510ba71bb54477841bebb90154ef140860f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Apr 2015 22:37:16 +0200 Subject: block: rename REQ_TYPE_SPECIAL to REQ_TYPE_DRV_PRIV Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7f9a516f24de..98c90272443b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -79,10 +79,10 @@ enum rq_cmd_type_bits { REQ_TYPE_PM_SUSPEND, /* suspend request */ REQ_TYPE_PM_RESUME, /* resume request */ REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ - REQ_TYPE_SPECIAL, /* driver defined type */ + REQ_TYPE_DRV_PRIV, /* driver defined type */ /* * for ATA/ATAPI devices. this really doesn't belong here, ide should - * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver + * use REQ_TYPE_DRV_PRIV and use rq->cmd[0] with the range of driver * private REQ_LB opcodes to differentiate what type of request this is */ REQ_TYPE_ATA_TASKFILE, -- cgit v1.2.3 From b42171ef7d938a66fa52e66a3d911ed63770b5ca Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Apr 2015 22:37:17 +0200 Subject: block: move REQ_TYPE_ATA_TASKFILE and REQ_TYPE_ATA_PC to ide.h These values are only used by the IDE driver, so move them into it by allowing drivers to take cmd_type values after the first private one. Note that we have to turn cmd_type into a plain unsigned integer so that gcc doesn't complain about mismatching enum types. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 11 ++--------- include/linux/ide.h | 7 +++++++ 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 98c90272443b..9cb4d80a4987 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -79,14 +79,7 @@ enum rq_cmd_type_bits { REQ_TYPE_PM_SUSPEND, /* suspend request */ REQ_TYPE_PM_RESUME, /* resume request */ REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ - REQ_TYPE_DRV_PRIV, /* driver defined type */ - /* - * for ATA/ATAPI devices. this really doesn't belong here, ide should - * use REQ_TYPE_DRV_PRIV and use rq->cmd[0] with the range of driver - * private REQ_LB opcodes to differentiate what type of request this is - */ - REQ_TYPE_ATA_TASKFILE, - REQ_TYPE_ATA_PC, + REQ_TYPE_DRV_PRIV, /* driver defined types from here */ }; #define BLK_MAX_CDB 16 @@ -108,7 +101,7 @@ struct request { struct blk_mq_ctx *mq_ctx; u64 cmd_flags; - enum rq_cmd_type_bits cmd_type; + unsigned cmd_type; unsigned long atomic_flags; int cpu; diff --git a/include/linux/ide.h b/include/linux/ide.h index 93b5ca754b5b..62ac399144a6 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -39,6 +39,12 @@ struct device; +/* IDE-specific values for req->cmd_type */ +enum ata_cmd_type_bits { + REQ_TYPE_ATA_TASKFILE = REQ_TYPE_DRV_PRIV + 1, + REQ_TYPE_ATA_PC, +}; + /* Error codes returned in rq->errors to the higher part of the driver. */ enum { IDE_DRV_ERROR_GENERAL = 101, @@ -1551,4 +1557,5 @@ static inline void ide_set_drivedata(ide_drive_t *drive, void *data) #define ide_host_for_each_port(i, port, host) \ for ((i) = 0; ((port) = (host)->ports[i]) || (i) < MAX_HOST_PORTS; (i)++) + #endif /* _IDE_H */ -- cgit v1.2.3 From b0b93b48a30e809240ddd7449a6ad60a5ddf7b4d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Apr 2015 22:37:18 +0200 Subject: block: move REQ_TYPE_SENSE to the ide driver Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - include/linux/ide.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9cb4d80a4987..6076b9e18dcb 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -75,7 +75,6 @@ struct request_list { enum rq_cmd_type_bits { REQ_TYPE_FS = 1, /* fs request */ REQ_TYPE_BLOCK_PC, /* scsi command */ - REQ_TYPE_SENSE, /* sense request */ REQ_TYPE_PM_SUSPEND, /* suspend request */ REQ_TYPE_PM_RESUME, /* resume request */ REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ diff --git a/include/linux/ide.h b/include/linux/ide.h index 62ac399144a6..9856b7d455d9 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -43,6 +43,7 @@ struct device; enum ata_cmd_type_bits { REQ_TYPE_ATA_TASKFILE = REQ_TYPE_DRV_PRIV + 1, REQ_TYPE_ATA_PC, + REQ_TYPE_ATA_SENSE, /* sense request */ }; /* Error codes returned in rq->errors to the higher part of the driver. */ -- cgit v1.2.3 From ac7cdff00a33d48d27217560fa3b16d802e5f535 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Apr 2015 22:37:19 +0200 Subject: block: remove REQ_TYPE_PM_SHUTDOWN Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6076b9e18dcb..c2829ba5e738 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -77,7 +77,6 @@ enum rq_cmd_type_bits { REQ_TYPE_BLOCK_PC, /* scsi command */ REQ_TYPE_PM_SUSPEND, /* suspend request */ REQ_TYPE_PM_RESUME, /* resume request */ - REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ REQ_TYPE_DRV_PRIV, /* driver defined types from here */ }; -- cgit v1.2.3 From a7928c1578c550bd6f4dec62d65132e6db226c57 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Apr 2015 22:37:20 +0200 Subject: block: move PM request support to IDE This removes the request types and hacks from the block code and into the old IDE driver. There is a small amunt of code duplication due to this, but it's not too bad. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 21 +-------------------- include/linux/ide.h | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c2829ba5e738..2da818a48097 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -30,7 +30,6 @@ struct scsi_ioctl_command; struct request_queue; struct elevator_queue; -struct request_pm_state; struct blk_trace; struct request; struct sg_io_hdr; @@ -75,8 +74,6 @@ struct request_list { enum rq_cmd_type_bits { REQ_TYPE_FS = 1, /* fs request */ REQ_TYPE_BLOCK_PC, /* scsi command */ - REQ_TYPE_PM_SUSPEND, /* suspend request */ - REQ_TYPE_PM_RESUME, /* resume request */ REQ_TYPE_DRV_PRIV, /* driver defined types from here */ }; @@ -207,19 +204,6 @@ static inline unsigned short req_get_ioprio(struct request *req) return req->ioprio; } -/* - * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME - * requests. Some step values could eventually be made generic. - */ -struct request_pm_state -{ - /* PM state machine step value, currently driver specific */ - int pm_step; - /* requested PM state value (S1, S2, S3, S4, ...) */ - u32 pm_state; - void* data; /* for driver use */ -}; - #include struct blk_queue_ctx; @@ -601,10 +585,6 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) (((rq)->cmd_flags & REQ_STARTED) && \ ((rq)->cmd_type == REQ_TYPE_FS)) -#define blk_pm_request(rq) \ - ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \ - (rq)->cmd_type == REQ_TYPE_PM_RESUME) - #define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) /* rq->queuelist of dequeued request must be list_empty() */ @@ -838,6 +818,7 @@ extern void blk_stop_queue(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); extern void __blk_stop_queue(struct request_queue *q); extern void __blk_run_queue(struct request_queue *q); +extern void __blk_run_queue_uncond(struct request_queue *q); extern void blk_run_queue(struct request_queue *); extern void blk_run_queue_async(struct request_queue *q); extern int blk_rq_map_user(struct request_queue *, struct request *, diff --git a/include/linux/ide.h b/include/linux/ide.h index 9856b7d455d9..a633898f36ac 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -44,8 +44,14 @@ enum ata_cmd_type_bits { REQ_TYPE_ATA_TASKFILE = REQ_TYPE_DRV_PRIV + 1, REQ_TYPE_ATA_PC, REQ_TYPE_ATA_SENSE, /* sense request */ + REQ_TYPE_ATA_PM_SUSPEND,/* suspend request */ + REQ_TYPE_ATA_PM_RESUME, /* resume request */ }; +#define ata_pm_request(rq) \ + ((rq)->cmd_type == REQ_TYPE_ATA_PM_SUSPEND || \ + (rq)->cmd_type == REQ_TYPE_ATA_PM_RESUME) + /* Error codes returned in rq->errors to the higher part of the driver. */ enum { IDE_DRV_ERROR_GENERAL = 101, @@ -1321,6 +1327,19 @@ struct ide_port_info { u8 udma_mask; }; +/* + * State information carried for REQ_TYPE_ATA_PM_SUSPEND and REQ_TYPE_ATA_PM_RESUME + * requests. + */ +struct ide_pm_state { + /* PM state machine step value, currently driver specific */ + int pm_step; + /* requested PM state value (S1, S2, S3, S4, ...) */ + u32 pm_state; + void* data; /* for driver use */ +}; + + int ide_pci_init_one(struct pci_dev *, const struct ide_port_info *, void *); int ide_pci_init_two(struct pci_dev *, struct pci_dev *, const struct ide_port_info *, void *); -- cgit v1.2.3 From cd8ae85299d54155702a56811b2e035e63064d3d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 3 May 2015 21:34:46 -0700 Subject: tcp: provide SYN headers for passive connections This patch allows a server application to get the TCP SYN headers for its passive connections. This is useful if the server is doing fingerprinting of clients based on SYN packet contents. Two socket options are added: TCP_SAVE_SYN and TCP_SAVED_SYN. The first is used on a socket to enable saving the SYN headers for child connections. This can be set before or after the listen() call. The latter is used to retrieve the SYN headers for passive connections, if the parent listener has enabled TCP_SAVE_SYN. TCP_SAVED_SYN is read once, it frees the saved SYN headers. The data returned in TCP_SAVED_SYN are network (IPv4/IPv6) and TCP headers. Original patch was written by Tom Herbert, I changed it to not hold a full skb (and associated dst and conntracking reference). We have used such patch for about 3 years at Google. Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Tested-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 3b2911502a8c..e6fb5df22db1 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -199,6 +199,7 @@ struct tcp_sock { syn_fastopen:1, /* SYN includes Fast Open option */ syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ + save_syn:1, /* Save headers of SYN packet */ is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ @@ -326,6 +327,7 @@ struct tcp_sock { * socket. Used to retransmit SYNACKs etc. */ struct request_sock *fastopen_rsk; + u32 *saved_syn; }; enum tsq_flags { @@ -393,4 +395,10 @@ static inline int fastopen_init_queue(struct sock *sk, int backlog) return 0; } +static inline void tcp_saved_syn_free(struct tcp_sock *tp) +{ + kfree(tp->saved_syn); + tp->saved_syn = NULL; +} + #endif /* _LINUX_TCP_H */ -- cgit v1.2.3 From 2c7a88c252bf3381958cf716f31b6b2e0f2f3fa7 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 4 May 2015 14:33:48 -0700 Subject: etherdev: Fix sparse error, make test usable by other functions This change does two things. First it fixes a sparse error for the fact that the __be16 degrades to an integer. Since that is actually what I am kind of doing I am simply working around that by forcing both sides of the comparison to u16. Also I realized on some compilers I was generating another instruction for big endian systems such as PowerPC since it was masking the value before doing the comparison. So to resolve that I have simply pulled the mask out and wrapped it in an #ifndef __BIG_ENDIAN. Lastly I pulled this all out into its own function. I notices there are similar checks in a number of other places so this function can be reused there to help reduce overhead in these paths as well. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index c4a10f991fe0..9012f8775208 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -190,6 +190,24 @@ static inline bool is_valid_ether_addr(const u8 *addr) return !is_multicast_ether_addr(addr) && !is_zero_ether_addr(addr); } +/** + * eth_proto_is_802_3 - Determine if a given Ethertype/length is a protocol + * @proto: Ethertype/length value to be tested + * + * Check that the value from the Ethertype/length field is a valid Ethertype. + * + * Return true if the valid is an 802.3 supported Ethertype. + */ +static inline bool eth_proto_is_802_3(__be16 proto) +{ +#ifndef __BIG_ENDIAN + /* if CPU is little endian mask off bits representing LSB */ + proto &= htons(0xFF00); +#endif + /* cast both to u16 and compare since LSB can be ignored */ + return (__force u16)proto >= (__force u16)htons(ETH_P_802_3_MIN); +} + /** * eth_random_addr - Generate software assigned random Ethernet address * @addr: Pointer to a six-byte array containing the Ethernet address -- cgit v1.2.3 From 9545b22da647cf6fbbac9c5a48c50fd72d892b11 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 4 May 2015 14:34:10 -0700 Subject: vlan: Use eth_proto_is_802_3 Replace "ntohs(proto) >= ETH_P_802_3_MIN" w/ eth_proto_is_802_3(proto). Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 920e4457ce6e..b9ab677c0c0a 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -539,7 +539,7 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, */ proto = vhdr->h_vlan_encapsulated_proto; - if (ntohs(proto) >= ETH_P_802_3_MIN) { + if (eth_proto_is_802_3(proto)) { skb->protocol = proto; return; } -- cgit v1.2.3 From 05836c378c7af9527b98a83746f32c7289a5f3c8 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 5 May 2015 16:23:57 -0700 Subject: util_macros.h: have array pointer point to array of constants Using the new find_closest() macro can result in the following sparse warnings. drivers/hwmon/lm85.c:194:16: warning: incorrect type in initializer (different modifiers) drivers/hwmon/lm85.c:194:16: expected int *__fc_a drivers/hwmon/lm85.c:194:16: got int static const [toplevel] * drivers/hwmon/lm85.c:210:16: warning: incorrect type in initializer (different modifiers) drivers/hwmon/lm85.c:210:16: expected int *__fc_a drivers/hwmon/lm85.c:210:16: got int const *map This is because the array passed to find_closest() will typically be declared as array of constants, but the macro declares a non-constant pointer to it. Signed-off-by: Guenter Roeck Cc: Bartosz Golaszewski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/util_macros.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h index d5f4fb69dba3..f9b2ce58039b 100644 --- a/include/linux/util_macros.h +++ b/include/linux/util_macros.h @@ -5,7 +5,7 @@ ({ \ typeof(as) __fc_i, __fc_as = (as) - 1; \ typeof(x) __fc_x = (x); \ - typeof(*a) *__fc_a = (a); \ + typeof(*a) const *__fc_a = (a); \ for (__fc_i = 0; __fc_i < __fc_as; __fc_i++) { \ if (__fc_x op DIV_ROUND_CLOSEST(__fc_a[__fc_i] + \ __fc_a[__fc_i + 1], 2)) \ -- cgit v1.2.3 From d8fd150fe3935e1692bf57c66691e17409ebb9c1 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 5 May 2015 16:24:00 -0700 Subject: nilfs2: fix sanity check of btree level in nilfs_btree_root_broken() The range check for b-tree level parameter in nilfs_btree_root_broken() is wrong; it accepts the case of "level == NILFS_BTREE_LEVEL_MAX" even though the level is limited to values in the range of 0 to (NILFS_BTREE_LEVEL_MAX - 1). Since the level parameter is read from storage device and used to index nilfs_btree_path array whose element count is NILFS_BTREE_LEVEL_MAX, it can cause memory overrun during btree operations if the boundary value is set to the level parameter on device. This fixes the broken sanity check and adds a comment to clarify that the upper bound NILFS_BTREE_LEVEL_MAX is exclusive. Signed-off-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nilfs2_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h index ff3fea3194c6..9abb763e4b86 100644 --- a/include/linux/nilfs2_fs.h +++ b/include/linux/nilfs2_fs.h @@ -460,7 +460,7 @@ struct nilfs_btree_node { /* level */ #define NILFS_BTREE_LEVEL_DATA 0 #define NILFS_BTREE_LEVEL_NODE_MIN (NILFS_BTREE_LEVEL_DATA + 1) -#define NILFS_BTREE_LEVEL_MAX 14 +#define NILFS_BTREE_LEVEL_MAX 14 /* Max level (exclusive) */ /** * struct nilfs_palloc_group_desc - block group descriptor -- cgit v1.2.3 From 2893c379461a208b3059f55dfe4dafa06b4aa46a Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Tue, 31 Mar 2015 20:16:52 +0200 Subject: clk: make strings in parent name arrays const MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The clk functions and structs declare the parent_name arrays as 'const char **parent_names' which means the parent name strings are const, but the array itself is not. Use 'const char * const * parent_names' instead which also makes the array const. This allows us to put the parent_name arrays into the __initconst section. Signed-off-by: Sascha Hauer Reviewed-by: Krzysztof Kozlowski Tested-by: Krzysztof Kozlowski Acked-by: Uwe Kleine-König [sboyd@codeaurora.org: Squelch 80-character checkpatch warnings] Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index df695313f975..5378c2aba4d2 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -209,7 +209,7 @@ struct clk_ops { struct clk_init_data { const char *name; const struct clk_ops *ops; - const char **parent_names; + const char * const *parent_names; u8 num_parents; unsigned long flags; }; @@ -426,12 +426,14 @@ extern const struct clk_ops clk_mux_ops; extern const struct clk_ops clk_mux_ro_ops; struct clk *clk_register_mux(struct device *dev, const char *name, - const char **parent_names, u8 num_parents, unsigned long flags, + const char * const *parent_names, u8 num_parents, + unsigned long flags, void __iomem *reg, u8 shift, u8 width, u8 clk_mux_flags, spinlock_t *lock); struct clk *clk_register_mux_table(struct device *dev, const char *name, - const char **parent_names, u8 num_parents, unsigned long flags, + const char * const *parent_names, u8 num_parents, + unsigned long flags, void __iomem *reg, u8 shift, u32 mask, u8 clk_mux_flags, u32 *table, spinlock_t *lock); @@ -518,7 +520,7 @@ struct clk_composite { }; struct clk *clk_register_composite(struct device *dev, const char *name, - const char **parent_names, int num_parents, + const char * const *parent_names, int num_parents, struct clk_hw *mux_hw, const struct clk_ops *mux_ops, struct clk_hw *rate_hw, const struct clk_ops *rate_ops, struct clk_hw *gate_hw, const struct clk_ops *gate_ops, -- cgit v1.2.3 From d5622a9c13752be46e6fcde9d31391ce0bb0598b Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 2 Mar 2015 15:45:41 +0000 Subject: clkdev: use clk_hw internally clk_add_alias() calls clk_get() followed by clk_put() but in between those two calls it saves away the struct clk pointer to a clk_lookup structure. This leaves the 'clk' member of the clk_lookup pointing at freed memory on configurations where CONFIG_COMMON_CLK=y. This is a problem because clk_get_sys() will eventually try to dereference the freed pointer by calling __clk_get_hw() on it. Fix this by saving away the struct clk_hw pointer instead of the struct clk pointer so that when we try to create a per-user struct clk in clk_get_sys() we don't dereference a junk pointer. Signed-off-by: Russell King --- include/linux/clkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clkdev.h b/include/linux/clkdev.h index 94bad77eeb4a..3003afad46c9 100644 --- a/include/linux/clkdev.h +++ b/include/linux/clkdev.h @@ -22,6 +22,7 @@ struct clk_lookup { const char *dev_id; const char *con_id; struct clk *clk; + struct clk_hw *clk_hw; }; #define CLKDEV_INIT(d, n, c) \ -- cgit v1.2.3 From d2d14a77886485310ec66e575f00ea5232ac7a14 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 14 Mar 2015 15:12:35 +0000 Subject: clk: update clk API documentation to clarify clk_round_rate() The idea is that rate = clk_round_rate(clk, r) is equivalent to: clk_set_rate(clk, r); rate = clk_get_rate(clk); except that clk_round_rate() does not change the hardware in any way. Signed-off-by: Russell King --- include/linux/clk.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk.h b/include/linux/clk.h index 68c16a6bedb3..cafb22df8d00 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -306,6 +306,20 @@ void devm_clk_put(struct device *dev, struct clk *clk); * @clk: clock source * @rate: desired clock rate in Hz * + * This answers the question "if I were to pass @rate to clk_set_rate(), + * what clock rate would I end up with?" without changing the hardware + * in any way. In other words: + * + * rate = clk_round_rate(clk, r); + * + * and: + * + * clk_set_rate(clk, r); + * rate = clk_get_rate(clk); + * + * are equivalent except the former does not modify the clock hardware + * in any way. + * * Returns rounded clock rate in Hz, or negative errno. */ long clk_round_rate(struct clk *clk, unsigned long rate); -- cgit v1.2.3 From 2d34e507293102f29ee94d9a9c5b890696d42452 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 9 Mar 2015 11:03:00 +0000 Subject: clkdev: get rid of redundant clk_add_alias() prototype in linux/clk.h clk_add_alias() is provided by clkdev, and is not part of the clk API. Howver, it is prototyped in two locations: linux/clkdev.h and linux/clk.h. This is a mess. Get rid of the redundant and unnecessary version in linux/clk.h. Acked-by: Tony Lindgren Tested-by: Robert Jarzmik Acked-by: Sekhar Nori Signed-off-by: Russell King --- include/linux/clk.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clk.h b/include/linux/clk.h index cafb22df8d00..0df4a51e1a78 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -485,19 +485,6 @@ static inline void clk_disable_unprepare(struct clk *clk) clk_unprepare(clk); } -/** - * clk_add_alias - add a new clock alias - * @alias: name for clock alias - * @alias_dev_name: device name - * @id: platform specific clock name - * @dev: device - * - * Allows using generic clock names for drivers by adding a new alias. - * Assumes clkdev, see clkdev.h for more info. - */ -int clk_add_alias(const char *alias, const char *alias_dev_name, char *id, - struct device *dev); - struct device_node; struct of_phandle_args; -- cgit v1.2.3 From b3d8d7e89fab374d731dfb46fe048f09766ca9c8 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 9 Mar 2015 10:43:04 +0000 Subject: clkdev: const-ify connection id to clk_add_alias() The connection id is only passed to clk_get() which is already const. Const-ify this argument too. Signed-off-by: Russell King --- include/linux/clkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clkdev.h b/include/linux/clkdev.h index 3003afad46c9..cd93b215e3af 100644 --- a/include/linux/clkdev.h +++ b/include/linux/clkdev.h @@ -39,7 +39,7 @@ void clkdev_add(struct clk_lookup *cl); void clkdev_drop(struct clk_lookup *cl); void clkdev_add_table(struct clk_lookup *, size_t); -int clk_add_alias(const char *, const char *, char *, struct device *); +int clk_add_alias(const char *, const char *, const char *, struct device *); int clk_register_clkdev(struct clk *, const char *, const char *, ...); int clk_register_clkdevs(struct clk *, struct clk_lookup *, size_t); -- cgit v1.2.3 From 2568999835d7797afce3dcc3a3f368051ffcaf1f Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 2 Mar 2015 15:40:29 +0000 Subject: clkdev: add clkdev_create() helper Add a helper to allocate and add a clk_lookup structure. This can not only be used in several places in clkdev.c to simplify the code, but more importantly, can be used by callers of the clkdev code to simplify their clkdev creation and registration. Signed-off-by: Russell King --- include/linux/clkdev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clkdev.h b/include/linux/clkdev.h index cd93b215e3af..a240b18e86fa 100644 --- a/include/linux/clkdev.h +++ b/include/linux/clkdev.h @@ -38,6 +38,9 @@ struct clk_lookup *clkdev_alloc(struct clk *clk, const char *con_id, void clkdev_add(struct clk_lookup *cl); void clkdev_drop(struct clk_lookup *cl); +struct clk_lookup *clkdev_create(struct clk *clk, const char *con_id, + const char *dev_fmt, ...); + void clkdev_add_table(struct clk_lookup *, size_t); int clk_add_alias(const char *, const char *, const char *, struct device *); -- cgit v1.2.3 From efb0de55b6a2ec15fc424e660601f22ae2fa487a Mon Sep 17 00:00:00 2001 From: Shobhit Kumar Date: Tue, 5 May 2015 15:04:18 +0530 Subject: pwm: Add support to remove registered consumer lookup tables In case some drivers are unloading, they can remove lookup tables which they had registered during their load time to avoid redundant entries if loaded again. CC: Samuel Ortiz Cc: Linus Walleij Cc: Alexandre Courbot Cc: Thierry Reding Signed-off-by: Shobhit Kumar Signed-off-by: Thierry Reding --- include/linux/pwm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index e90628cac8fa..cfe2d8df5be0 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -290,10 +290,15 @@ struct pwm_lookup { #if IS_ENABLED(CONFIG_PWM) void pwm_add_table(struct pwm_lookup *table, size_t num); +void pwm_remove_table(struct pwm_lookup *table, size_t num); #else static inline void pwm_add_table(struct pwm_lookup *table, size_t num) { } + +static inline void pwm_remove_table(struct pwm_lookup *table, size_t num) +{ +} #endif #ifdef CONFIG_PWM_SYSFS -- cgit v1.2.3 From fa76a3db7093a527333c380df82a0f158d9b8299 Mon Sep 17 00:00:00 2001 From: Sonic Zhang Date: Thu, 9 Apr 2015 11:13:07 +0800 Subject: pinctrl: allow exlusive GPIO/mux pin allocation Disallow simultaneous use of the the GPIO and peripheral mux functions by setting a flag "strict" in struct pinctrl_desc. The blackfin pinmux and gpio controller doesn't allow user to set up a pin for both GPIO and peripheral function. So, add flag strict in struct pinctrl_desc to check both gpio_owner and mux_owner before approving the pin request. v2-changes: - if strict flag is set, check gpio_owner and mux_onwer in if and else clause v3-changes: - add kerneldoc for this struct - augment Documentation/pinctrl.txt Signed-off-by: Sonic Zhang Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinctrl.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h index 66e4697516de..fc6b0348c375 100644 --- a/include/linux/pinctrl/pinctrl.h +++ b/include/linux/pinctrl/pinctrl.h @@ -114,6 +114,8 @@ struct pinctrl_ops { * of the pins field above * @pctlops: pin control operation vtable, to support global concepts like * grouping of pins, this is optional. + * @strict: check both gpio_owner and mux_owner strictly before approving + the pin request * @pmxops: pinmux operations vtable, if you support pinmuxing in your driver * @confops: pin config operations vtable, if you support pin configuration in * your driver @@ -132,6 +134,7 @@ struct pinctrl_desc { const struct pinctrl_ops *pctlops; const struct pinmux_ops *pmxops; const struct pinconf_ops *confops; + bool strict; struct module *owner; #ifdef CONFIG_GENERIC_PINCONF unsigned int num_custom_params; -- cgit v1.2.3 From 8c4c2016345feefcd289ce2479eb70286d30825a Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 6 May 2015 14:19:13 +0200 Subject: pinctrl: move strict option to pinmux_ops While the pinmux_ops are ideally just a vtable for pin mux calls, the "strict" setting belongs so intuitively with the pin multiplexing that we should move it here anyway. Putting it in the top pinctrl_desc makes no sense. Cc: Sonic Zhang Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinctrl.h | 3 --- include/linux/pinctrl/pinmux.h | 4 ++++ 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h index fc6b0348c375..66e4697516de 100644 --- a/include/linux/pinctrl/pinctrl.h +++ b/include/linux/pinctrl/pinctrl.h @@ -114,8 +114,6 @@ struct pinctrl_ops { * of the pins field above * @pctlops: pin control operation vtable, to support global concepts like * grouping of pins, this is optional. - * @strict: check both gpio_owner and mux_owner strictly before approving - the pin request * @pmxops: pinmux operations vtable, if you support pinmuxing in your driver * @confops: pin config operations vtable, if you support pin configuration in * your driver @@ -134,7 +132,6 @@ struct pinctrl_desc { const struct pinctrl_ops *pctlops; const struct pinmux_ops *pmxops; const struct pinconf_ops *confops; - bool strict; struct module *owner; #ifdef CONFIG_GENERIC_PINCONF unsigned int num_custom_params; diff --git a/include/linux/pinctrl/pinmux.h b/include/linux/pinctrl/pinmux.h index 511bda9ed4bf..d3740fa7073f 100644 --- a/include/linux/pinctrl/pinmux.h +++ b/include/linux/pinctrl/pinmux.h @@ -56,6 +56,9 @@ struct pinctrl_dev; * depending on whether the GPIO is configured as input or output, * a direction selector function may be implemented as a backing * to the GPIO controllers that need pin muxing. + * @strict: do not allow simultaneous use of the same pin for GPIO and another + * function. Check both gpio_owner and mux_owner strictly before approving + * the pin request. */ struct pinmux_ops { int (*request) (struct pinctrl_dev *pctldev, unsigned offset); @@ -79,6 +82,7 @@ struct pinmux_ops { struct pinctrl_gpio_range *range, unsigned offset, bool input); + bool strict; }; #endif /* CONFIG_PINMUX */ -- cgit v1.2.3 From cac089f9026e9ddb3481daf08f0fc4e5949fa1af Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 23 Apr 2015 16:56:22 -0700 Subject: gpio: omap: Allow building as a loadable module We currently get all kinds of errors building the omap gpio driver as a module starting with: undefined reference to `omap2_gpio_resume_after_idle' undefined reference to `omap2_gpio_prepare_for_idle' ... Let's fix the issue by adding inline functions to the header. Note that we can now also remove the two unused functions for omap_set_gpio_debounce and omap_set_gpio_debounce_time. Then doing rmmod on the module produces further warnings because of missing exit related functions. Let's add those. And finally, we can make the Kconfig entry just a tristate option that's selected for omaps. Cc: Javier Martinez Canillas Cc: Kevin Hilman Cc: Nishanth Menon Signed-off-by: Tony Lindgren Reviewed-by: Grygorii Strashko Acked-by: Santosh Shilimkar Reviewed-by: Felipe Balbi Signed-off-by: Linus Walleij --- include/linux/platform_data/gpio-omap.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/gpio-omap.h b/include/linux/platform_data/gpio-omap.h index 5d50b25a73d7..cb2618147c34 100644 --- a/include/linux/platform_data/gpio-omap.h +++ b/include/linux/platform_data/gpio-omap.h @@ -208,9 +208,17 @@ struct omap_gpio_platform_data { int (*get_context_loss_count)(struct device *dev); }; +#if IS_BUILTIN(CONFIG_GPIO_OMAP) extern void omap2_gpio_prepare_for_idle(int off_mode); extern void omap2_gpio_resume_after_idle(void); -extern void omap_set_gpio_debounce(int gpio, int enable); -extern void omap_set_gpio_debounce_time(int gpio, int enable); +#else +static inline void omap2_gpio_prepare_for_idle(int off_mode) +{ +} + +static inline void omap2_gpio_resume_after_idle(void) +{ +} +#endif #endif -- cgit v1.2.3 From 66eb3bd857f5311f72c7c371f78ddc9c472befba Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Mon, 27 Apr 2015 18:04:05 +0200 Subject: pinctrl: use ERR_CAST instead of ERR_PTR/PTR_ERR Inspired by scripts/coccinelle/api/err_cast.cocci Signed-off-by: Fabian Frederick Signed-off-by: Linus Walleij --- include/linux/pinctrl/consumer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pinctrl/consumer.h b/include/linux/pinctrl/consumer.h index 18eccefea06e..d7e5d608faa7 100644 --- a/include/linux/pinctrl/consumer.h +++ b/include/linux/pinctrl/consumer.h @@ -142,7 +142,7 @@ static inline struct pinctrl * __must_check pinctrl_get_select( s = pinctrl_lookup_state(p, name); if (IS_ERR(s)) { pinctrl_put(p); - return ERR_PTR(PTR_ERR(s)); + return ERR_CAST(s); } ret = pinctrl_select_state(p, s); -- cgit v1.2.3 From 1d6b98774cff82860a3f044610e956bcbff556c1 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 31 Mar 2015 15:55:57 +0200 Subject: tty: constify return type of tty_name All users of tty_name pass the result directly to a printf-like function. This means we can actually let tty_name return the literal "NULL tty" or tty->name directly, avoiding the strcpy and a lot of medium-sized stack buffers. In preparation for that, make the return type const char*. While at it, we can also constify the tty parameter. Signed-off-by: Rasmus Villemoes Reviewed-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index fe5623c9af71..4cbecfc7b3c9 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -421,7 +421,7 @@ static inline struct tty_struct *tty_kref_get(struct tty_struct *tty) extern int tty_paranoia_check(struct tty_struct *tty, struct inode *inode, const char *routine); -extern char *tty_name(struct tty_struct *tty, char *buf); +extern const char *tty_name(const struct tty_struct *tty, char *buf); extern void tty_wait_until_sent(struct tty_struct *tty, long timeout); extern int tty_check_change(struct tty_struct *tty); extern void __stop_tty(struct tty_struct *tty); -- cgit v1.2.3 From 429b474990cb4e5e8cfe2352daf649d0599cccb6 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 31 Mar 2015 15:55:59 +0200 Subject: tty: remove buf parameter from tty_name() tty_name no longer uses the buf parameter, so remove it along with all the 64 byte stack buffers that used to be passed in. Mostly generated by the coccinelle script @depends on patch@ identifier buf; constant C; expression tty; @@ - char buf[C]; <+... - tty_name(tty, buf) + tty_name(tty) ...+> allmodconfig compiles, so I'm fairly confident the stack buffers weren't used for other purposes as well. Signed-off-by: Rasmus Villemoes Reviewed-by: Peter Hurley Acked-by: Jesper Nilsson Acked-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 4cbecfc7b3c9..9a72c9144d8a 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -421,7 +421,7 @@ static inline struct tty_struct *tty_kref_get(struct tty_struct *tty) extern int tty_paranoia_check(struct tty_struct *tty, struct inode *inode, const char *routine); -extern const char *tty_name(const struct tty_struct *tty, char *buf); +extern const char *tty_name(const struct tty_struct *tty); extern void tty_wait_until_sent(struct tty_struct *tty, long timeout); extern int tty_check_change(struct tty_struct *tty); extern void __stop_tty(struct tty_struct *tty); -- cgit v1.2.3 From 6b3cddccf4eec0883feb065aea28dd9770bb17d0 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sat, 11 Apr 2015 11:02:36 -0400 Subject: serial: core: Fix unused variable warnings from uart_console() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If CONFIG_SERIAL_CORE_CONSOLE=n, build warnings are generated by uart_console() macro expansion: drivers/tty/serial/of_serial.c: In function ‘of_serial_suspend_8250’: drivers/tty/serial/of_serial.c:262:20: warning: unused variable ‘port’ [-Wunused-variable] struct uart_port *port = &port8250->port; ^ drivers/tty/serial/of_serial.c: In function ‘of_serial_resume_8250’: drivers/tty/serial/of_serial.c:272:20: warning: unused variable ‘port’ [-Wunused-variable] struct uart_port *port = &port8250->port; Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 025dad9dcde4..297d4fa1cfe5 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -35,7 +35,7 @@ #define uart_console(port) \ ((port)->cons && (port)->cons->index == (port)->line) #else -#define uart_console(port) (0) +#define uart_console(port) ({ (void)port; 0; }) #endif struct uart_port; -- cgit v1.2.3 From 17799359e7b3fa6ef4f2bf926cd6821cf7903ecf Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Sat, 28 Feb 2015 02:13:11 -0800 Subject: mtd: nand_bbt: make nand_scan_bbt() static This implementation detail is no longer needed outside of nand_bbt.c. Signed-off-by: Brian Norris --- include/linux/mtd/nand.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 3d4ea7eb2b68..6c51876941f3 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -833,7 +833,6 @@ struct nand_manufacturers { extern struct nand_flash_dev nand_flash_ids[]; extern struct nand_manufacturers nand_manuf_ids[]; -extern int nand_scan_bbt(struct mtd_info *mtd, struct nand_bbt_descr *bd); extern int nand_default_bbt(struct mtd_info *mtd); extern int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs); extern int nand_isreserved_bbt(struct mtd_info *mtd, loff_t offs); -- cgit v1.2.3 From ac01ce1410fc2c7b5f3af5e9c972e6a412eee54f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Wed, 29 Apr 2015 16:18:46 +0100 Subject: tracing: Make ftrace_print_array_seq compute buf_len MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only caller to this function (__print_array) was getting it wrong by passing the array length instead of buffer length. As the element size was already being passed for other reasons it seems reasonable to push the calculation of buffer length into the function. Link: http://lkml.kernel.org/r/1430320727-14582-1-git-send-email-alex.bennee@linaro.org Signed-off-by: Alex Bennée Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 46e83c2156c6..f9ecf63d47f1 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -46,7 +46,7 @@ const char *ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int len); const char *ftrace_print_array_seq(struct trace_seq *p, - const void *buf, int buf_len, + const void *buf, int count, size_t el_size); struct trace_iterator; -- cgit v1.2.3 From 0097d12e504b3ce57b68810737ad6a5a64a98c68 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 30 Apr 2015 13:43:30 +0200 Subject: KVM: provide irq_unsafe kvm_guest_{enter|exit} Several kvm architectures disable interrupts before kvm_guest_enter. kvm_guest_enter then uses local_irq_save/restore to disable interrupts again or for the first time. Lets provide underscore versions of kvm_guest_{enter|exit} that assume being called locked. kvm_guest_enter now disables interrupts for the full function and thus we can remove the check for preemptible. This patch then adopts s390/kvm to use local_irq_disable/enable calls which are slighty cheaper that local_irq_save/restore and call these new functions. Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ad45054309a0..efc16df1fc5d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -762,16 +762,10 @@ static inline void kvm_iommu_unmap_pages(struct kvm *kvm, } #endif -static inline void kvm_guest_enter(void) +/* must be called with irqs disabled */ +static inline void __kvm_guest_enter(void) { - unsigned long flags; - - BUG_ON(preemptible()); - - local_irq_save(flags); guest_enter(); - local_irq_restore(flags); - /* KVM does not hold any references to rcu protected data when it * switches CPU into a guest mode. In fact switching to a guest mode * is very similar to exiting to userspace from rcu point of view. In @@ -783,12 +777,27 @@ static inline void kvm_guest_enter(void) rcu_virt_note_context_switch(smp_processor_id()); } +/* must be called with irqs disabled */ +static inline void __kvm_guest_exit(void) +{ + guest_exit(); +} + +static inline void kvm_guest_enter(void) +{ + unsigned long flags; + + local_irq_save(flags); + __kvm_guest_enter(); + local_irq_restore(flags); +} + static inline void kvm_guest_exit(void) { unsigned long flags; local_irq_save(flags); - guest_exit(); + __kvm_guest_exit(); local_irq_restore(flags); } -- cgit v1.2.3 From 653f52c316a49c5ee2701bc13b15879f20790662 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Thu, 23 Apr 2015 11:52:37 -0400 Subject: kvm,x86: load guest FPU context more eagerly Currently KVM will clear the FPU bits in CR0.TS in the VMCS, and trap to re-load them every time the guest accesses the FPU after a switch back into the guest from the host. This patch copies the x86 task switch semantics for FPU loading, with the FPU loaded eagerly after first use if the system uses eager fpu mode, or if the guest uses the FPU frequently. In the latter case, after loading the FPU for 255 times, the fpu_counter will roll over, and we will revert to loading the FPU on demand, until it has been established that the guest is still actively using the FPU. This mirrors the x86 task switch policy, which seems to work. Signed-off-by: Rik van Riel Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index efc16df1fc5d..b7a08cd6f4a8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -230,6 +230,7 @@ struct kvm_vcpu { int fpu_active; int guest_fpu_loaded, guest_xcr0_loaded; + unsigned char fpu_counter; wait_queue_head_t wq; struct pid *pid; int sigset_active; -- cgit v1.2.3 From aed5ed47724f6a7453fa62e3c90f3cee93edbfe3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 6 May 2015 18:04:23 +0200 Subject: context_tracking: Protect against recursion Context tracking recursion can happen when an exception triggers in the middle of a call to a context tracking probe. This special case can be caused by vmalloc faults. If an access to a memory area allocated by vmalloc happens in the middle of context_tracking_enter(), we may run into an endless fault loop because the exception in turn calls context_tracking_enter() which faults on the same vmalloc'ed memory, triggering an exception again, etc... Some rare crashes have been reported so lets protect against this with a recursion counter. Reported-by: Dave Jones Signed-off-by: Frederic Weisbecker Reviewed-by: Rik van Riel Acked-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Chris Metcalf Cc: H. Peter Anvin Cc: Martin Schwidefsky Cc: Mike Galbraith Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Rafael J . Wysocki Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1430928266-24888-2-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/context_tracking_state.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 6b7b96a32b75..678ecdf90cf6 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -12,6 +12,7 @@ struct context_tracking { * may be further optimized using static keys. */ bool active; + int recursion; enum ctx_state { CONTEXT_KERNEL = 0, CONTEXT_USER, -- cgit v1.2.3 From fafe870f31212a72f3c2d74e7b90e4ef39e83ee1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 6 May 2015 18:04:24 +0200 Subject: context_tracking: Inherit TIF_NOHZ through forks instead of context switches TIF_NOHZ is used by context_tracking to force syscall slow-path on every task in order to track userspace roundtrips. As such, it must be set on all running tasks. It's currently explicitly inherited through context switches. There is no need to do it in this fast-path though. The flag could simply be set once for all on all tasks, whether they are running or not. Lets do this by setting the flag for the init task on early boot, and let it propagate through fork inheritance. While at it, mark context_tracking_cpu_set() as init code, we only need it at early boot time. Suggested-by: Oleg Nesterov Signed-off-by: Frederic Weisbecker Reviewed-by: Rik van Riel Cc: Borislav Petkov Cc: Chris Metcalf Cc: Dave Jones Cc: H. Peter Anvin Cc: Martin Schwidefsky Cc: Mike Galbraith Cc: Paul E . McKenney Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rafael J . Wysocki Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1430928266-24888-3-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/context_tracking.h | 10 ---------- include/linux/sched.h | 3 +++ 2 files changed, 3 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 2821838256b4..b96bd299966f 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -14,8 +14,6 @@ extern void context_tracking_enter(enum ctx_state state); extern void context_tracking_exit(enum ctx_state state); extern void context_tracking_user_enter(void); extern void context_tracking_user_exit(void); -extern void __context_tracking_task_switch(struct task_struct *prev, - struct task_struct *next); static inline void user_enter(void) { @@ -51,19 +49,11 @@ static inline void exception_exit(enum ctx_state prev_ctx) } } -static inline void context_tracking_task_switch(struct task_struct *prev, - struct task_struct *next) -{ - if (context_tracking_is_enabled()) - __context_tracking_task_switch(prev, next); -} #else static inline void user_enter(void) { } static inline void user_exit(void) { } static inline enum ctx_state exception_enter(void) { return 0; } static inline void exception_exit(enum ctx_state prev_ctx) { } -static inline void context_tracking_task_switch(struct task_struct *prev, - struct task_struct *next) { } #endif /* !CONFIG_CONTEXT_TRACKING */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 26a2e6122734..185a750e4ed4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2532,6 +2532,9 @@ static inline unsigned long wait_task_inactive(struct task_struct *p, } #endif +#define tasklist_empty() \ + list_empty(&init_task.tasks) + #define next_task(p) \ list_entry_rcu((p)->tasks.next, struct task_struct, tasks) -- cgit v1.2.3 From 83dedea8a07fb4bf91863764b15c1c4ec00330f9 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 6 May 2015 18:04:25 +0200 Subject: nohz: Add tick_nohz_full_add_cpus_to() API This API is useful to modify a cpumask indicating some special nohz-type functionality so that the nohz cores are automatically added to that set. Signed-off-by: Chris Metcalf Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Dave Jones Cc: H. Peter Anvin Cc: Martin Schwidefsky Cc: Mike Galbraith Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Rafael J. Wysocki Cc: Rik van Riel Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1429024675-18938-1-git-send-email-cmetcalf@ezchip.com Link: http://lkml.kernel.org/r/1430928266-24888-4-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/tick.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index f8492da57ad3..4191b5623a28 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -134,6 +134,12 @@ static inline bool tick_nohz_full_cpu(int cpu) return cpumask_test_cpu(cpu, tick_nohz_full_mask); } +static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) +{ + if (tick_nohz_full_enabled()) + cpumask_or(mask, mask, tick_nohz_full_mask); +} + extern void __tick_nohz_full_check(void); extern void tick_nohz_full_kick(void); extern void tick_nohz_full_kick_cpu(int cpu); @@ -142,6 +148,7 @@ extern void __tick_nohz_task_switch(struct task_struct *tsk); #else static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; } +static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } static inline void __tick_nohz_full_check(void) { } static inline void tick_nohz_full_kick_cpu(int cpu) { } static inline void tick_nohz_full_kick(void) { } -- cgit v1.2.3 From c6201cd8513db2db54b248a862672849ed9ccb82 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 7 May 2015 09:52:22 -0500 Subject: PCI/MSI: Remove unused pci_msi_off() pci_msi_off() is unused, so remove it. Removes the exported symbol pci_msi_off(). Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 353db8dc4c6e..50b7c7d0206f 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -974,7 +974,6 @@ void pci_intx(struct pci_dev *dev, int enable); bool pci_intx_mask_supported(struct pci_dev *dev); bool pci_check_and_mask_intx(struct pci_dev *dev); bool pci_check_and_unmask_intx(struct pci_dev *dev); -void pci_msi_off(struct pci_dev *dev); int pci_set_dma_max_seg_size(struct pci_dev *dev, unsigned int size); int pci_set_dma_seg_boundary(struct pci_dev *dev, unsigned long mask); int pci_wait_for_pending(struct pci_dev *dev, int pos, u16 mask); -- cgit v1.2.3 From 385f83f85cd9428db82cae5e6f6f786be113b24c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 28 Apr 2015 12:21:40 +0200 Subject: dmaengine: Remove Renesas Audio DMAC peri peri platform data Commit 3cd44dcd35a6 ("dmaengine: remove Renesas Audio DMAC peri peri") forgot to remove the header file with the platform data definitions. Signed-off-by: Geert Uytterhoeven Acked-by: Simon Horman Signed-off-by: Vinod Koul --- include/linux/platform_data/dma-rcar-audmapp.h | 34 -------------------------- 1 file changed, 34 deletions(-) delete mode 100644 include/linux/platform_data/dma-rcar-audmapp.h (limited to 'include/linux') diff --git a/include/linux/platform_data/dma-rcar-audmapp.h b/include/linux/platform_data/dma-rcar-audmapp.h deleted file mode 100644 index 471fffebbeb4..000000000000 --- a/include/linux/platform_data/dma-rcar-audmapp.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * This is for Renesas R-Car Audio-DMAC-peri-peri. - * - * Copyright (C) 2014 Renesas Electronics Corporation - * Copyright (C) 2014 Kuninori Morimoto - * - * This file is based on the include/linux/sh_dma.h - * - * Header for the new SH dmaengine driver - * - * Copyright (C) 2010 Guennadi Liakhovetski - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#ifndef SH_AUDMAPP_H -#define SH_AUDMAPP_H - -#include - -struct audmapp_slave_config { - int slave_id; - dma_addr_t src; - dma_addr_t dst; - u32 chcr; -}; - -struct audmapp_pdata { - struct audmapp_slave_config *slave; - int slave_num; -}; - -#endif /* SH_AUDMAPP_H */ -- cgit v1.2.3 From 0782e63bc6fe7e2d3408d250df11d388b7799c6b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 5 May 2015 19:49:49 +0200 Subject: sched: Handle priority boosted tasks proper in setscheduler() Ronny reported that the following scenario is not handled correctly: T1 (prio = 10) lock(rtmutex); T2 (prio = 20) lock(rtmutex) boost T1 T1 (prio = 20) sys_set_scheduler(prio = 30) T1 prio = 30 .... sys_set_scheduler(prio = 10) T1 prio = 30 The last step is wrong as T1 should now be back at prio 20. Commit c365c292d059 ("sched: Consider pi boosting in setscheduler()") only handles the case where a boosted tasks tries to lower its priority. Fix it by taking the new effective priority into account for the decision whether a change of the priority is required. Reported-by: Ronny Meeus Tested-by: Steven Rostedt Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt Cc: Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Mike Galbraith Fixes: c365c292d059 ("sched: Consider pi boosting in setscheduler()") Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1505051806060.4225@nanos Signed-off-by: Ingo Molnar --- include/linux/sched/rt.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index 6341f5be6e24..a30b172df6e1 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -18,7 +18,7 @@ static inline int rt_task(struct task_struct *p) #ifdef CONFIG_RT_MUTEXES extern int rt_mutex_getprio(struct task_struct *p); extern void rt_mutex_setprio(struct task_struct *p, int prio); -extern int rt_mutex_check_prio(struct task_struct *task, int newprio); +extern int rt_mutex_get_effective_prio(struct task_struct *task, int newprio); extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task); extern void rt_mutex_adjust_pi(struct task_struct *p); static inline bool tsk_is_pi_blocked(struct task_struct *tsk) @@ -31,9 +31,10 @@ static inline int rt_mutex_getprio(struct task_struct *p) return p->normal_prio; } -static inline int rt_mutex_check_prio(struct task_struct *task, int newprio) +static inline int rt_mutex_get_effective_prio(struct task_struct *task, + int newprio) { - return 0; + return newprio; } static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task) -- cgit v1.2.3 From 3289bdb429884c0279bf9ab72dff7b934f19dfc6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Apr 2015 13:19:42 +0200 Subject: sched: Move the loadavg code to a more obvious location I could not find the loadavg code.. turns out it was hidden in a file called proc.c. It further got mingled up with the cruft per rq load indexes (which we really want to get rid of). Move the per rq load indexes into the fair.c load-balance code (that's the only thing that uses them) and rename proc.c to loadavg.c so we can find it again. Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Paul Gortmaker Cc: Thomas Gleixner [ Did minor cleanups to the code. ] Signed-off-by: Ingo Molnar --- include/linux/sched.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 26a2e6122734..85cf253bc366 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -173,7 +173,12 @@ extern unsigned long nr_iowait_cpu(int cpu); extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); extern void calc_global_load(unsigned long ticks); + +#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) extern void update_cpu_load_nohz(void); +#else +static inline void update_cpu_load_nohz(void) { } +#endif extern unsigned long get_parent_ip(unsigned long addr); -- cgit v1.2.3 From b76808e6808e34e7e78131d2b8cb0535622b8e9f Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Thu, 30 Apr 2015 21:19:57 -0700 Subject: signals, sched: Change all uses of JOBCTL_* from 'int' to 'long' c56fb6564dcd ("Fix a misaligned load inside ptrace_attach()") makes jobctl an "unsigned long". It makes sense to have the masks applied to it match that type. This is currently just a cosmetic change, but it will prevent the mask from being unexpectedly truncated if we ever end up with masks with more bits. One instance of "signr" is an int, but I left this alone because the mask ensures that it will never overflow. Signed-off-by: Palmer Dabbelt Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Chris Metcalf Cc: Andrew Morton Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Thomas Gleixner Cc: bobby.prani@gmail.com Cc: oleg@redhat.com Cc: paulmck@linux.vnet.ibm.com Cc: richard@nod.at Cc: vdavydov@parallels.com Link: http://lkml.kernel.org/r/1430453997-32459-4-git-send-email-palmer@dabbelt.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 85cf253bc366..4f066cb625ad 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2082,22 +2082,22 @@ TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) #define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ #define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ -#define JOBCTL_STOP_DEQUEUED (1 << JOBCTL_STOP_DEQUEUED_BIT) -#define JOBCTL_STOP_PENDING (1 << JOBCTL_STOP_PENDING_BIT) -#define JOBCTL_STOP_CONSUME (1 << JOBCTL_STOP_CONSUME_BIT) -#define JOBCTL_TRAP_STOP (1 << JOBCTL_TRAP_STOP_BIT) -#define JOBCTL_TRAP_NOTIFY (1 << JOBCTL_TRAP_NOTIFY_BIT) -#define JOBCTL_TRAPPING (1 << JOBCTL_TRAPPING_BIT) -#define JOBCTL_LISTENING (1 << JOBCTL_LISTENING_BIT) +#define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) +#define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) +#define JOBCTL_STOP_CONSUME (1UL << JOBCTL_STOP_CONSUME_BIT) +#define JOBCTL_TRAP_STOP (1UL << JOBCTL_TRAP_STOP_BIT) +#define JOBCTL_TRAP_NOTIFY (1UL << JOBCTL_TRAP_NOTIFY_BIT) +#define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT) +#define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) #define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) #define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) extern bool task_set_jobctl_pending(struct task_struct *task, - unsigned int mask); + unsigned long mask); extern void task_clear_jobctl_trapping(struct task_struct *task); extern void task_clear_jobctl_pending(struct task_struct *task, - unsigned int mask); + unsigned long mask); static inline void rcu_copy_process(struct task_struct *p) { -- cgit v1.2.3 From 7e60598785f30cf3dc9e476cc0fc3feeb37a0c63 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Thu, 30 Apr 2015 21:19:56 -0700 Subject: sched/wait: Change wait_on_bit*() to take an unsigned long *, not a void * The implementations of wait_on_bit*() will only work with long-aligned memory on systems that don't support misaligned loads and stores. This patch changes the function prototypes to ensure that the compiler will enforce alignment. Running make defconfig make KFLAGS="-Werror" seems to indicate that, as of c56fb6564dcd ("Fix a misaligned load inside ptrace_attach()"), there are now no users of non-long-aligned calls to wait_on_bit*(). I additionally tried a few "make randconfig" attempts, none of which failed to compile for this reason. Signed-off-by: Palmer Dabbelt Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Chris Metcalf Cc: Andrew Morton Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Thomas Gleixner Cc: bobby.prani@gmail.com Cc: oleg@redhat.com Cc: paulmck@linux.vnet.ibm.com Cc: richard@nod.at Cc: vdavydov@parallels.com Link: http://lkml.kernel.org/r/1430453997-32459-3-git-send-email-palmer@dabbelt.com Signed-off-by: Ingo Molnar --- include/linux/wait.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index 2db83349865b..d69ac4ecc88b 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -969,7 +969,7 @@ extern int bit_wait_io_timeout(struct wait_bit_key *); * on that signal. */ static inline int -wait_on_bit(void *word, int bit, unsigned mode) +wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); if (!test_bit(bit, word)) @@ -994,7 +994,7 @@ wait_on_bit(void *word, int bit, unsigned mode) * on that signal. */ static inline int -wait_on_bit_io(void *word, int bit, unsigned mode) +wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); if (!test_bit(bit, word)) @@ -1020,7 +1020,8 @@ wait_on_bit_io(void *word, int bit, unsigned mode) * received a signal and the mode permitted wakeup on that signal. */ static inline int -wait_on_bit_timeout(void *word, int bit, unsigned mode, unsigned long timeout) +wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode, + unsigned long timeout) { might_sleep(); if (!test_bit(bit, word)) @@ -1047,7 +1048,8 @@ wait_on_bit_timeout(void *word, int bit, unsigned mode, unsigned long timeout) * on that signal. */ static inline int -wait_on_bit_action(void *word, int bit, wait_bit_action_f *action, unsigned mode) +wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action, + unsigned mode) { might_sleep(); if (!test_bit(bit, word)) @@ -1075,7 +1077,7 @@ wait_on_bit_action(void *word, int bit, wait_bit_action_f *action, unsigned mode * the @mode allows that signal to wake the process. */ static inline int -wait_on_bit_lock(void *word, int bit, unsigned mode) +wait_on_bit_lock(unsigned long *word, int bit, unsigned mode) { might_sleep(); if (!test_and_set_bit(bit, word)) @@ -1099,7 +1101,7 @@ wait_on_bit_lock(void *word, int bit, unsigned mode) * the @mode allows that signal to wake the process. */ static inline int -wait_on_bit_lock_io(void *word, int bit, unsigned mode) +wait_on_bit_lock_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); if (!test_and_set_bit(bit, word)) @@ -1125,7 +1127,8 @@ wait_on_bit_lock_io(void *word, int bit, unsigned mode) * the @mode allows that signal to wake the process. */ static inline int -wait_on_bit_lock_action(void *word, int bit, wait_bit_action_f *action, unsigned mode) +wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action, + unsigned mode) { might_sleep(); if (!test_and_set_bit(bit, word)) -- cgit v1.2.3 From e7cc4173115347bcdaa5de2824dd46ef2c58425f Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Thu, 30 Apr 2015 21:19:55 -0700 Subject: signals, ptrace, sched: Fix a misaligned load inside ptrace_attach() The misaligned load exception arises when running ptrace_attach() on the RISC-V (which hasn't been upstreamed yet). The problem is that wait_on_bit() takes a void* but then proceeds to call test_bit(), which takes a long*. This allows an int-aligned pointer to be passed to test_bit(), which promptly fails. This will manifest on any other asm-generic port where unaligned loads trap, where sizeof(long) > sizeof(int), and where task_struct.jobctl ends up not being long-aligned. This patch changes task_struct.jobctl to be a long, which ensures it has the correct alignment. Signed-off-by: Palmer Dabbelt Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Chris Metcalf Cc: Andrew Morton Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Thomas Gleixner Cc: bobby.prani@gmail.com Cc: oleg@redhat.com Cc: paulmck@linux.vnet.ibm.com Cc: richard@nod.at Cc: vdavydov@parallels.com Link: http://lkml.kernel.org/r/1430453997-32459-2-git-send-email-palmer@dabbelt.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4f066cb625ad..fb650a2f4a73 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1374,7 +1374,7 @@ struct task_struct { int exit_state; int exit_code, exit_signal; int pdeath_signal; /* The signal sent when the parent dies */ - unsigned int jobctl; /* JOBCTL_*, siglock protected */ + unsigned long jobctl; /* JOBCTL_*, siglock protected */ /* Used for emulating ABI behavior of previous Linux versions */ unsigned int personality; -- cgit v1.2.3 From 316c1608d15c736439d4065ed12f306db554b3da Mon Sep 17 00:00:00 2001 From: Jason Low Date: Tue, 28 Apr 2015 13:00:20 -0700 Subject: sched, timer: Convert usages of ACCESS_ONCE() in the scheduler to READ_ONCE()/WRITE_ONCE() ACCESS_ONCE doesn't work reliably on non-scalar types. This patch removes the rest of the existing usages of ACCESS_ONCE() in the scheduler, and use the new READ_ONCE() and WRITE_ONCE() APIs as appropriate. Signed-off-by: Jason Low Signed-off-by: Peter Zijlstra (Intel) Acked-by: Thomas Gleixner Acked-by: Rik van Riel Acked-by: Waiman Long Cc: Andrew Morton Cc: Aswin Chandramouleeswaran Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Mel Gorman Cc: Mike Galbraith Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Preeti U Murthy Cc: Scott J Norton Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1430251224-5764-2-git-send-email-jason.low2@hp.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index fb650a2f4a73..d70910355b20 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3085,13 +3085,13 @@ static inline void mm_update_next_owner(struct mm_struct *mm) static inline unsigned long task_rlimit(const struct task_struct *tsk, unsigned int limit) { - return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_cur); + return READ_ONCE(tsk->signal->rlim[limit].rlim_cur); } static inline unsigned long task_rlimit_max(const struct task_struct *tsk, unsigned int limit) { - return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_max); + return READ_ONCE(tsk->signal->rlim[limit].rlim_max); } static inline unsigned long rlimit(unsigned int limit) -- cgit v1.2.3 From 1018016c706f7ff9f56fde3a649789c47085a293 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Tue, 28 Apr 2015 13:00:22 -0700 Subject: sched, timer: Replace spinlocks with atomics in thread_group_cputimer(), to improve scalability While running a database workload, we found a scalability issue with itimers. Much of the problem was caused by the thread_group_cputimer spinlock. Each time we account for group system/user time, we need to obtain a thread_group_cputimer's spinlock to update the timers. On larger systems (such as a 16 socket machine), this caused more than 30% of total time spent trying to obtain this kernel lock to update these group timer stats. This patch converts the timers to 64-bit atomic variables and use atomic add to update them without a lock. With this patch, the percent of total time spent updating thread group cputimer timers was reduced from 30% down to less than 1%. Note: On 32-bit systems using the generic 64-bit atomics, this causes sample_group_cputimer() to take locks 3 times instead of just 1 time. However, we tested this patch on a 32-bit system ARM system using the generic atomics and did not find the overhead to be much of an issue. An explanation for why this isn't an issue is that 32-bit systems usually have small numbers of CPUs, and cacheline contention from extra spinlocks called periodically is not really apparent on smaller systems. Signed-off-by: Jason Low Signed-off-by: Peter Zijlstra (Intel) Acked-by: Thomas Gleixner Acked-by: Rik van Riel Cc: Andrew Morton Cc: Aswin Chandramouleeswaran Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Mel Gorman Cc: Mike Galbraith Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Scott J Norton Cc: Steven Rostedt Cc: Waiman Long Link: http://lkml.kernel.org/r/1430251224-5764-4-git-send-email-jason.low2@hp.com Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 7 ++++--- include/linux/sched.h | 10 +++------- 2 files changed, 7 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 696d22312b31..7b9d8b59e7bf 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -50,9 +50,10 @@ extern struct fs_struct init_fs; .cpu_timers = INIT_CPU_TIMERS(sig.cpu_timers), \ .rlim = INIT_RLIMITS, \ .cputimer = { \ - .cputime = INIT_CPUTIME, \ - .running = 0, \ - .lock = __RAW_SPIN_LOCK_UNLOCKED(sig.cputimer.lock), \ + .utime = ATOMIC64_INIT(0), \ + .stime = ATOMIC64_INIT(0), \ + .sum_exec_runtime = ATOMIC64_INIT(0), \ + .running = 0 \ }, \ .cred_guard_mutex = \ __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ diff --git a/include/linux/sched.h b/include/linux/sched.h index d70910355b20..a45874c3fab6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -598,9 +598,10 @@ struct task_cputime { * used for thread group CPU timer calculations. */ struct thread_group_cputimer { - struct task_cputime cputime; + atomic64_t utime; + atomic64_t stime; + atomic64_t sum_exec_runtime; int running; - raw_spinlock_t lock; }; #include @@ -2967,11 +2968,6 @@ static __always_inline bool need_resched(void) void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times); -static inline void thread_group_cputime_init(struct signal_struct *sig) -{ - raw_spin_lock_init(&sig->cputimer.lock); -} - /* * Reevaluate whether the task has signals pending delivery. * Wake the task if so. -- cgit v1.2.3 From 971e8a985482c76487edb5a49811e99b96e846e1 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Tue, 28 Apr 2015 13:00:23 -0700 Subject: sched, timer: Provide an atomic 'struct task_cputime' data structure This patch adds an atomic variant of the 'struct task_cputime' data structure, which can be used to store and update task_cputime statistics without needing to do locking. Suggested-by: Ingo Molnar Signed-off-by: Jason Low Signed-off-by: Peter Zijlstra (Intel) Acked-by: Thomas Gleixner Acked-by: Rik van Riel Cc: Andrew Morton Cc: Aswin Chandramouleeswaran Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Mel Gorman Cc: Mike Galbraith Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Scott J Norton Cc: Steven Rostedt Cc: Waiman Long Link: http://lkml.kernel.org/r/1430251224-5764-5-git-send-email-jason.low2@hp.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index a45874c3fab6..6eb78cd45da7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -572,6 +572,23 @@ struct task_cputime { .sum_exec_runtime = 0, \ } +/* + * This is the atomic variant of task_cputime, which can be used for + * storing and updating task_cputime statistics without locking. + */ +struct task_cputime_atomic { + atomic64_t utime; + atomic64_t stime; + atomic64_t sum_exec_runtime; +}; + +#define INIT_CPUTIME_ATOMIC \ + (struct task_cputime_atomic) { \ + .utime = ATOMIC64_INIT(0), \ + .stime = ATOMIC64_INIT(0), \ + .sum_exec_runtime = ATOMIC64_INIT(0), \ + } + #ifdef CONFIG_PREEMPT_COUNT #define PREEMPT_DISABLED (1 + PREEMPT_ENABLED) #else -- cgit v1.2.3 From 7110744516276e906f9197e2857d026eb2343393 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Tue, 28 Apr 2015 13:00:24 -0700 Subject: sched, timer: Use the atomic task_cputime in thread_group_cputimer Recent optimizations were made to thread_group_cputimer to improve its scalability by keeping track of cputime stats without a lock. However, the values were open coded to the structure, causing them to be at a different abstraction level from the regular task_cputime structure. Furthermore, any subsequent similar optimizations would not be able to share the new code, since they are specific to thread_group_cputimer. This patch adds the new task_cputime_atomic data structure (introduced in the previous patch in the series) to thread_group_cputimer for keeping track of the cputime atomically, which also helps generalize the code. Suggested-by: Ingo Molnar Signed-off-by: Jason Low Signed-off-by: Peter Zijlstra (Intel) Acked-by: Thomas Gleixner Acked-by: Rik van Riel Cc: Andrew Morton Cc: Aswin Chandramouleeswaran Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Mel Gorman Cc: Mike Galbraith Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Scott J Norton Cc: Steven Rostedt Cc: Waiman Long Link: http://lkml.kernel.org/r/1430251224-5764-6-git-send-email-jason.low2@hp.com Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 6 ++---- include/linux/sched.h | 4 +--- 2 files changed, 3 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 7b9d8b59e7bf..bb9b075f0eb0 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -50,10 +50,8 @@ extern struct fs_struct init_fs; .cpu_timers = INIT_CPU_TIMERS(sig.cpu_timers), \ .rlim = INIT_RLIMITS, \ .cputimer = { \ - .utime = ATOMIC64_INIT(0), \ - .stime = ATOMIC64_INIT(0), \ - .sum_exec_runtime = ATOMIC64_INIT(0), \ - .running = 0 \ + .cputime_atomic = INIT_CPUTIME_ATOMIC, \ + .running = 0, \ }, \ .cred_guard_mutex = \ __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 6eb78cd45da7..4adc536a3b03 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -615,9 +615,7 @@ struct task_cputime_atomic { * used for thread group CPU timer calculations. */ struct thread_group_cputimer { - atomic64_t utime; - atomic64_t stime; - atomic64_t sum_exec_runtime; + struct task_cputime_atomic cputime_atomic; int running; }; -- cgit v1.2.3 From 7675104990ed255b9315a82ae827ff312a2a88a2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 1 May 2015 08:27:50 -0700 Subject: sched: Implement lockless wake-queues This is useful for locking primitives that can effect multiple wakeups per operation and want to avoid lock internal lock contention by delaying the wakeups until we've released the lock internal locks. Alternatively it can be used to avoid issuing multiple wakeups, and thus save a few cycles, in packet processing. Queue all target tasks and wakeup once you've processed all packets. That way you avoid waking the target task multiple times if there were multiple packets for the same task. Properties of a wake_q are: - Lockless, as queue head must reside on the stack. - Being a queue, maintains wakeup order passed by the callers. This can be important for otherwise, in scenarios where highly contended locks could affect any reliance on lock fairness. - A queued task cannot be added again until it is woken up. This patch adds the needed infrastructure into the scheduler code and uses the new wake_list to delay the futex wakeups until after we've released the hash bucket locks. Signed-off-by: Peter Zijlstra (Intel) [tweaks, adjustments, comments, etc.] Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra (Intel) Acked-by: Thomas Gleixner Cc: Borislav Petkov Cc: Chris Mason Cc: Davidlohr Bueso Cc: George Spelvin Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Manfred Spraul Cc: Sebastian Andrzej Siewior Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1430494072-30283-2-git-send-email-dave@stgolabs.net Signed-off-by: Ingo Molnar --- include/linux/sched.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4adc536a3b03..254d88e80f65 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -920,6 +920,50 @@ enum cpu_idle_type { #define SCHED_CAPACITY_SHIFT 10 #define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT) +/* + * Wake-queues are lists of tasks with a pending wakeup, whose + * callers have already marked the task as woken internally, + * and can thus carry on. A common use case is being able to + * do the wakeups once the corresponding user lock as been + * released. + * + * We hold reference to each task in the list across the wakeup, + * thus guaranteeing that the memory is still valid by the time + * the actual wakeups are performed in wake_up_q(). + * + * One per task suffices, because there's never a need for a task to be + * in two wake queues simultaneously; it is forbidden to abandon a task + * in a wake queue (a call to wake_up_q() _must_ follow), so if a task is + * already in a wake queue, the wakeup will happen soon and the second + * waker can just skip it. + * + * The WAKE_Q macro declares and initializes the list head. + * wake_up_q() does NOT reinitialize the list; it's expected to be + * called near the end of a function, where the fact that the queue is + * not used again will be easy to see by inspection. + * + * Note that this can cause spurious wakeups. schedule() callers + * must ensure the call is done inside a loop, confirming that the + * wakeup condition has in fact occurred. + */ +struct wake_q_node { + struct wake_q_node *next; +}; + +struct wake_q_head { + struct wake_q_node *first; + struct wake_q_node **lastp; +}; + +#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01) + +#define WAKE_Q(name) \ + struct wake_q_head name = { WAKE_Q_TAIL, &name.first } + +extern void wake_q_add(struct wake_q_head *head, + struct task_struct *task); +extern void wake_up_q(struct wake_q_head *head); + /* * sched-domains (multiprocessor balancing) declarations: */ @@ -1532,6 +1576,8 @@ struct task_struct { /* Protection of the PI data structures: */ raw_spinlock_t pi_lock; + struct wake_q_node wake_q; + #ifdef CONFIG_RT_MUTEXES /* PI waiters blocked on a rt_mutex held by this task */ struct rb_root pi_waiters; -- cgit v1.2.3 From ff303e66c240ba6269e31817a386995440a18c99 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 17 Apr 2015 20:05:30 +0200 Subject: perf: Fix software migrate events Stephane asked about PERF_COUNT_SW_CPU_MIGRATIONS and I realized it was borken: > The problem is that the task isn't actually scheduled while its being > migrated (obviously), and if its not scheduled, the counters aren't > scheduled either, so there's no observing of the fact. > > A further problem with migrations is that many migrations happen from > softirq context, which is nested inside the 'random' task context of > whoemever happens to run at that time, similarly for the wakeup > migrations triggered from (soft)irq context. All those end up being > accounted in the task that's currently running, eg. your 'ls'. The below cures this by marking a task as migrated and accounting it on the subsequent sched_in(). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 24 ++++++++++++++++++++++++ include/linux/sched.h | 7 ++++--- 2 files changed, 28 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 61992cf2e977..e86f85abeda7 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -798,11 +798,33 @@ perf_sw_event_sched(u32 event_id, u64 nr, u64 addr) extern struct static_key_deferred perf_sched_events; +static __always_inline bool +perf_sw_migrate_enabled(void) +{ + if (static_key_false(&perf_swevent_enabled[PERF_COUNT_SW_CPU_MIGRATIONS])) + return true; + return false; +} + +static inline void perf_event_task_migrate(struct task_struct *task) +{ + if (perf_sw_migrate_enabled()) + task->sched_migrated = 1; +} + static inline void perf_event_task_sched_in(struct task_struct *prev, struct task_struct *task) { if (static_key_false(&perf_sched_events.key)) __perf_event_task_sched_in(prev, task); + + if (perf_sw_migrate_enabled() && task->sched_migrated) { + struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]); + + perf_fetch_caller_regs(regs); + ___perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, regs, 0); + task->sched_migrated = 0; + } } static inline void perf_event_task_sched_out(struct task_struct *prev, @@ -925,6 +947,8 @@ perf_aux_output_skip(struct perf_output_handle *handle, static inline void * perf_get_aux(struct perf_output_handle *handle) { return NULL; } static inline void +perf_event_task_migrate(struct task_struct *task) { } +static inline void perf_event_task_sched_in(struct task_struct *prev, struct task_struct *task) { } static inline void diff --git a/include/linux/sched.h b/include/linux/sched.h index 26a2e6122734..2c5e6c3db654 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1356,9 +1356,6 @@ struct task_struct { #endif struct mm_struct *mm, *active_mm; -#ifdef CONFIG_COMPAT_BRK - unsigned brk_randomized:1; -#endif /* per-thread vma caching */ u32 vmacache_seqnum; struct vm_area_struct *vmacache[VMACACHE_SIZE]; @@ -1381,10 +1378,14 @@ struct task_struct { /* Revert to default priority/policy when forking */ unsigned sched_reset_on_fork:1; unsigned sched_contributes_to_load:1; + unsigned sched_migrated:1; #ifdef CONFIG_MEMCG_KMEM unsigned memcg_kmem_skip_account:1; #endif +#ifdef CONFIG_COMPAT_BRK + unsigned brk_randomized:1; +#endif unsigned long atomic_flags; /* Flags needing atomic access. */ -- cgit v1.2.3 From 59aabfc7e959f5f213e4e5cc7567ab4934da2adf Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 30 Apr 2015 17:12:16 -0400 Subject: locking/rwsem: Reduce spinlock contention in wakeup after up_read()/up_write() In up_write()/up_read(), rwsem_wake() will be called whenever it detects that some writers/readers are waiting. The rwsem_wake() function will take the wait_lock and call __rwsem_do_wake() to do the real wakeup. For a heavily contended rwsem, doing a spin_lock() on wait_lock will cause further contention on the heavily contended rwsem cacheline resulting in delay in the completion of the up_read/up_write operations. This patch makes the wait_lock taking and the call to __rwsem_do_wake() optional if at least one spinning writer is present. The spinning writer will be able to take the rwsem and call rwsem_wake() later when it calls up_write(). With the presence of a spinning writer, rwsem_wake() will now try to acquire the lock using trylock. If that fails, it will just quit. Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Davidlohr Bueso Acked-by: Jason Low Cc: Andrew Morton Cc: Borislav Petkov Cc: Douglas Hatch Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Scott J Norton Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1430428337-16802-2-git-send-email-Waiman.Long@hp.com Signed-off-by: Ingo Molnar --- include/linux/osq_lock.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/osq_lock.h b/include/linux/osq_lock.h index 3a6490e81b28..703ea5c30a33 100644 --- a/include/linux/osq_lock.h +++ b/include/linux/osq_lock.h @@ -32,4 +32,9 @@ static inline void osq_lock_init(struct optimistic_spin_queue *lock) extern bool osq_lock(struct optimistic_spin_queue *lock); extern void osq_unlock(struct optimistic_spin_queue *lock); +static inline bool osq_is_locked(struct optimistic_spin_queue *lock) +{ + return atomic_read(&lock->tail) != OSQ_UNLOCKED_VAL; +} + #endif -- cgit v1.2.3 From 663fdcbee0a656cdaef934e7f50e6c2670373bc9 Mon Sep 17 00:00:00 2001 From: Preeti U Murthy Date: Thu, 30 Apr 2015 17:27:21 +0530 Subject: kernel: Replace reference to ASSIGN_ONCE() with WRITE_ONCE() in comment Looks like commit : 43239cbe79fc ("kernel: Change ASSIGN_ONCE(val, x) to WRITE_ONCE(x, val)") left behind a reference to ASSIGN_ONCE(). Update this to WRITE_ONCE(). Signed-off-by: Preeti U Murthy Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Thomas Gleixner Cc: borntraeger@de.ibm.com Cc: dave@stgolabs.net Cc: paulmck@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/20150430115721.22278.94082.stgit@preeti.in.ibm.com Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 867722591be2..a7c0941d10da 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -450,7 +450,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s * with an explicit memory barrier or atomic instruction that provides the * required ordering. * - * If possible use READ_ONCE/ASSIGN_ONCE instead. + * If possible use READ_ONCE()/WRITE_ONCE() instead. */ #define __ACCESS_ONCE(x) ({ \ __maybe_unused typeof(x) __var = (__force typeof(x)) 0; \ -- cgit v1.2.3 From 0cd3be6e9a46f84ef7a42e1a5645d32ad547b12e Mon Sep 17 00:00:00 2001 From: Sebastian Hesselbarth Date: Mon, 4 May 2015 23:04:16 +0200 Subject: clk: si5351: Do not pass struct clk in platform_data When registering clk-si5351 by platform_data, we should not pass struct clk for the reference clocks. Drop struct clk from platform_data and rework the driver to use devm_clk_get of named clock references. While at it, check for at least one valid input clock and properly prepare/ enable valid reference clocks. Signed-off-by: Sebastian Hesselbarth Reported-by: Michael Welling Reported-by: Jean-Francois Moine Reported-by: Russell King Tested-by: Michael Welling Tested-by: Jean-Francois Moine Signed-off-by: Michael Turquette --- include/linux/platform_data/si5351.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/si5351.h b/include/linux/platform_data/si5351.h index a947ab8b441a..533d9807e543 100644 --- a/include/linux/platform_data/si5351.h +++ b/include/linux/platform_data/si5351.h @@ -5,8 +5,6 @@ #ifndef __LINUX_PLATFORM_DATA_SI5351_H__ #define __LINUX_PLATFORM_DATA_SI5351_H__ -struct clk; - /** * enum si5351_pll_src - Si5351 pll clock source * @SI5351_PLL_SRC_DEFAULT: default, do not change eeprom config @@ -107,8 +105,6 @@ struct si5351_clkout_config { * @clkout: array of clkout configuration */ struct si5351_platform_data { - struct clk *clk_xtal; - struct clk *clk_clkin; enum si5351_pll_src pll_src[2]; struct si5351_clkout_config clkout[8]; }; -- cgit v1.2.3 From 56f13c0d9524c5816f5dc9c91b9d766d6b1064ca Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 9 Apr 2015 12:35:47 +0300 Subject: dmaengine: of_dma: Support for DMA routers DMA routers are transparent devices used to mux DMA requests from peripherals to DMA controllers. They are used when the SoC integrates more devices with DMA requests then their controller can handle. DRA7x is one example of such SoC, where the sDMA can hanlde 128 DMA request lines, but in SoC level it has 205 DMA requests. The of_dma_router will be registered as of_dma_controller with special xlate function and additional parameters. The driver for the router is responsible to craft the dma_spec (in the of_dma_route_allocate callback) which can be used to requests a DMA channel from the real DMA controller. This way the router can be transparent for the system while remaining generic enough to be used in different environments. Signed-off-by: Peter Ujfalusi Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 17 +++++++++++++++++ include/linux/of_dma.h | 21 +++++++++++++++++++++ 2 files changed, 38 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index ad419757241f..abf63ceabef9 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -221,6 +221,16 @@ struct dma_chan_percpu { unsigned long bytes_transferred; }; +/** + * struct dma_router - DMA router structure + * @dev: pointer to the DMA router device + * @route_free: function to be called when the route can be disconnected + */ +struct dma_router { + struct device *dev; + void (*route_free)(struct device *dev, void *route_data); +}; + /** * struct dma_chan - devices supply DMA channels, clients use them * @device: ptr to the dma device who supplies this channel, always !%NULL @@ -232,6 +242,8 @@ struct dma_chan_percpu { * @local: per-cpu pointer to a struct dma_chan_percpu * @client_count: how many clients are using this channel * @table_count: number of appearances in the mem-to-mem allocation table + * @router: pointer to the DMA router structure + * @route_data: channel specific data for the router * @private: private data for certain client-channel associations */ struct dma_chan { @@ -247,6 +259,11 @@ struct dma_chan { struct dma_chan_percpu __percpu *local; int client_count; int table_count; + + /* DMA router */ + struct dma_router *router; + void *route_data; + void *private; }; diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h index 56bc026c143f..98ba7525929e 100644 --- a/include/linux/of_dma.h +++ b/include/linux/of_dma.h @@ -23,6 +23,9 @@ struct of_dma { struct device_node *of_node; struct dma_chan *(*of_dma_xlate) (struct of_phandle_args *, struct of_dma *); + void *(*of_dma_route_allocate) + (struct of_phandle_args *, struct of_dma *); + struct dma_router *dma_router; void *of_dma_data; }; @@ -37,12 +40,20 @@ extern int of_dma_controller_register(struct device_node *np, (struct of_phandle_args *, struct of_dma *), void *data); extern void of_dma_controller_free(struct device_node *np); + +extern int of_dma_router_register(struct device_node *np, + void *(*of_dma_route_allocate) + (struct of_phandle_args *, struct of_dma *), + struct dma_router *dma_router); +#define of_dma_router_free of_dma_controller_free + extern struct dma_chan *of_dma_request_slave_channel(struct device_node *np, const char *name); extern struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec, struct of_dma *ofdma); extern struct dma_chan *of_dma_xlate_by_chan_id(struct of_phandle_args *dma_spec, struct of_dma *ofdma); + #else static inline int of_dma_controller_register(struct device_node *np, struct dma_chan *(*of_dma_xlate) @@ -56,6 +67,16 @@ static inline void of_dma_controller_free(struct device_node *np) { } +static inline int of_dma_router_register(struct device_node *np, + void *(*of_dma_route_allocate) + (struct of_phandle_args *, struct of_dma *), + struct dma_router *dma_router) +{ + return -ENODEV; +} + +#define of_dma_router_free of_dma_controller_free + static inline struct dma_chan *of_dma_request_slave_channel(struct device_node *np, const char *name) { -- cgit v1.2.3 From 4ae92bc77ac8e620f7c8d59b5882a4cb0d1c4ef1 Mon Sep 17 00:00:00 2001 From: Nicolas Schichan Date: Wed, 6 May 2015 16:12:27 +0200 Subject: net: filter: add a callback to allow classic post-verifier transformations This is in preparation for use by the seccomp code, the rationale is not to duplicate additional code within the seccomp layer, but instead, have it abstracted and hidden within the classic BPF API. As an interim step, this now also makes bpf_prepare_filter() visible (not as exported symbol though), so that seccomp can reuse that code path instead of reimplementing it. Joint work with Daniel Borkmann. Signed-off-by: Nicolas Schichan Signed-off-by: Daniel Borkmann Cc: Alexei Starovoitov Cc: Kees Cook Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index fa11b3a367be..91996247cb55 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -384,7 +384,13 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_attach_bpf(u32 ufd, struct sock *sk); int sk_detach_filter(struct sock *sk); +typedef int (*bpf_aux_classic_check_t)(struct sock_filter *filter, + unsigned int flen); + int bpf_check_classic(const struct sock_filter *filter, unsigned int flen); +struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp, + bpf_aux_classic_check_t trans); + int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned int len); -- cgit v1.2.3 From d9e12f42e58da475379b9080708b94f2095904af Mon Sep 17 00:00:00 2001 From: Nicolas Schichan Date: Wed, 6 May 2015 16:12:28 +0200 Subject: seccomp: simplify seccomp_prepare_filter and reuse bpf_prepare_filter Remove the calls to bpf_check_classic(), bpf_convert_filter() and bpf_migrate_runtime() and let bpf_prepare_filter() take care of that instead. seccomp_check_filter() is passed to bpf_prepare_filter() so that it gets called from there, after bpf_check_classic(). We can now remove exposure of two internal classic BPF functions previously used by seccomp. The export of bpf_check_classic() symbol, previously known as sk_chk_filter(), was there since pre git times, and no in-tree module was using it, therefore remove it. Joint work with Daniel Borkmann. Signed-off-by: Nicolas Schichan Signed-off-by: Daniel Borkmann Cc: Alexei Starovoitov Cc: Kees Cook Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 91996247cb55..0dcb44bcfc5f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -363,9 +363,6 @@ int sk_filter(struct sock *sk, struct sk_buff *skb); void bpf_prog_select_runtime(struct bpf_prog *fp); void bpf_prog_free(struct bpf_prog *fp); -int bpf_convert_filter(struct sock_filter *prog, int len, - struct bpf_insn *new_prog, int *new_len); - struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags); struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, gfp_t gfp_extra_flags); @@ -387,7 +384,6 @@ int sk_detach_filter(struct sock *sk); typedef int (*bpf_aux_classic_check_t)(struct sock_filter *filter, unsigned int flen); -int bpf_check_classic(const struct sock_filter *filter, unsigned int flen); struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp, bpf_aux_classic_check_t trans); -- cgit v1.2.3 From ac67eb2c5347bd9976308c0e0cf1d9e7ca690342 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 6 May 2015 16:12:30 +0200 Subject: seccomp, filter: add and use bpf_prog_create_from_user from seccomp Seccomp has always been a special candidate when it comes to preparation of its filters in seccomp_prepare_filter(). Due to the extra checks and filter rewrite it partially duplicates code and has BPF internals exposed. This patch adds a generic API inside the BPF code code that seccomp can use and thus keep it's filter preparation code minimal and better maintainable. The other side-effect is that now classic JITs can add seccomp support as well by only providing a BPF_LDX | BPF_W | BPF_ABS translation. Tested with seccomp and BPF test suites. Signed-off-by: Daniel Borkmann Cc: Nicolas Schichan Cc: Alexei Starovoitov Cc: Kees Cook Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 0dcb44bcfc5f..3c03a6085b82 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -374,19 +374,17 @@ static inline void bpf_prog_unlock_free(struct bpf_prog *fp) __bpf_prog_free(fp); } +typedef int (*bpf_aux_classic_check_t)(struct sock_filter *filter, + unsigned int flen); + int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog); +int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, + bpf_aux_classic_check_t trans); void bpf_prog_destroy(struct bpf_prog *fp); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_attach_bpf(u32 ufd, struct sock *sk); int sk_detach_filter(struct sock *sk); - -typedef int (*bpf_aux_classic_check_t)(struct sock_filter *filter, - unsigned int flen); - -struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp, - bpf_aux_classic_check_t trans); - int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned int len); -- cgit v1.2.3 From 59324cf35aba5336b611074028777838a963d03b Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 7 May 2015 11:02:53 +0200 Subject: netlink: allow to listen "all" netns More accurately, listen all netns that have a nsid assigned into the netns where the netlink socket is opened. For this purpose, a netlink socket option is added: NETLINK_LISTEN_ALL_NSID. When this option is set on a netlink socket, this socket will receive netlink notifications from all netns that have a nsid assigned into the netns where the socket has been opened. The nsid is sent to userland via an anscillary data. With this patch, a daemon needs only one socket to listen many netns. This is useful when the number of netns is high. Because 0 is a valid value for a nsid, the field nsid_is_set indicates if the field nsid is valid or not. skb->cb is initialized to 0 on skb allocation, thus we are sure that we will never send a nsid 0 by error to the userland. Signed-off-by: Nicolas Dichtel Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 6835c1279df7..9120edb650a0 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -28,6 +28,8 @@ struct netlink_skb_parms { __u32 dst_group; __u32 flags; struct sock *sk; + bool nsid_is_set; + int nsid; }; #define NETLINK_CB(skb) (*(struct netlink_skb_parms*)&((skb)->cb)) -- cgit v1.2.3 From 920ce39f6c204d4ce4d8acebe7522f0dfa95f662 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Fri, 8 May 2015 14:31:50 -0700 Subject: sched, timer: Fix documentation for 'struct thread_group_cputimer' Fix the docbook build bug reported by Fengguang Wu. Reported-by: Fengguang Wu Signed-off-by: Jason Low Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Waiman.Long@hp.com Cc: aswin@hp.com Cc: bp@alien8.de Cc: dave@stgolabs.net Cc: fweisbec@gmail.com Cc: mgorman@suse.de Cc: oleg@redhat.com Cc: paulmck@linux.vnet.ibm.com Cc: preeti@linux.vnet.ibm.com Cc: riel@redhat.com Cc: rostedt@goodmis.org Cc: scott.norton@hp.com Cc: torvalds@linux-foundation.org Cc: umgwanakikbuti@gmail.com Link: http://lkml.kernel.org/r/1431120710.5136.12.camel@j-VirtualBox Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 254d88e80f65..0eceeec5a01a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -606,10 +606,9 @@ struct task_cputime_atomic { /** * struct thread_group_cputimer - thread group interval timer counts - * @cputime: thread group interval timers. + * @cputime_atomic: atomic thread group interval timers. * @running: non-zero when there are timers running and * @cputime receives updates. - * @lock: lock for fields in this struct. * * This structure contains the version of task_cputime, above, that is * used for thread group CPU timer calculations. -- cgit v1.2.3 From 34ef33f7da6b00900d3a896d33522a035a930245 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 28 Apr 2015 14:04:07 +0200 Subject: usb: phy: Remove the phy-rcar-gen2-usb driver The phy-rcar-gen2-usb driver, which supports legacy platform data only, is no longer used since commit a483dcbfa21f919c ("ARM: shmobile: lager: Remove legacy board support"). This driver was superseded by the DT-only phy-rcar-gen2 driver, which was introduced in commit 1233f59f745b237d ("phy: Renesas R-Car Gen2 PHY driver"). Signed-off-by: Geert Uytterhoeven Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_data/usb-rcar-gen2-phy.h | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 include/linux/platform_data/usb-rcar-gen2-phy.h (limited to 'include/linux') diff --git a/include/linux/platform_data/usb-rcar-gen2-phy.h b/include/linux/platform_data/usb-rcar-gen2-phy.h deleted file mode 100644 index dd3ba46c0d90..000000000000 --- a/include/linux/platform_data/usb-rcar-gen2-phy.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (C) 2013 Renesas Solutions Corp. - * Copyright (C) 2013 Cogent Embedded, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef __USB_RCAR_GEN2_PHY_H -#define __USB_RCAR_GEN2_PHY_H - -#include - -struct rcar_gen2_phy_platform_data { - /* USB channel 0 configuration */ - bool chan0_pci:1; /* true: PCI USB host 0, false: USBHS */ - /* USB channel 2 configuration */ - bool chan2_pci:1; /* true: PCI USB host 2, false: USBSS */ -}; - -#endif -- cgit v1.2.3 From 1c5841e832e2d7563c31de4946118e78baf573a3 Mon Sep 17 00:00:00 2001 From: Eddie Huang Date: Tue, 28 Apr 2015 21:40:32 +0800 Subject: tty: serial: 8250: export early_serial8250_setup function 8250-like uart driver may call early_serial8250_setup to reuse 8250_early.c character output function. Signed-off-by: Eddie Huang Tested-by: Sascha Hauer Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_8250.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 78097e7a330a..f0c68d88b6f4 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -137,6 +137,8 @@ extern int early_serial_setup(struct uart_port *port); extern unsigned int serial8250_early_in(struct uart_port *port, int offset); extern void serial8250_early_out(struct uart_port *port, int offset, int value); +extern int early_serial8250_setup(struct earlycon_device *device, + const char *options); extern void serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, struct ktermios *old); extern int serial8250_do_startup(struct uart_port *port); -- cgit v1.2.3 From c27ffc1080179c3f3b85e1e194fa61f1c9923b62 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 30 Apr 2015 18:21:25 +0200 Subject: serial: sh-sci: Move private definitions to private header file Move private register definitions and enums from the public header file to the driver private "sh-sci.h" header file. The common Serial Control Register definitions are left in the public header file, as they're needed to fill in plat_sci_port.scscr on legacy systems not using DT. Signed-off-by: Geert Uytterhoeven Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_sci.h | 67 +--------------------------------------------- 1 file changed, 1 insertion(+), 66 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h index 6c5e3bb282b0..395fceb8c060 100644 --- a/include/linux/serial_sci.h +++ b/include/linux/serial_sci.h @@ -10,13 +10,6 @@ #define SCIx_NOT_SUPPORTED (-1) -/* SCSMR (Serial Mode Register) */ -#define SCSMR_CHR (1 << 6) /* 7-bit Character Length */ -#define SCSMR_PE (1 << 5) /* Parity Enable */ -#define SCSMR_ODD (1 << 4) /* Odd Parity */ -#define SCSMR_STOP (1 << 3) /* Stop Bit Length */ -#define SCSMR_CKS 0x0003 /* Clock Select */ - /* Serial Control Register (@ = not supported by all parts) */ #define SCSCR_TIE (1 << 7) /* Transmit Interrupt Enable */ #define SCSCR_RIE (1 << 6) /* Receive Interrupt Enable */ @@ -26,43 +19,7 @@ #define SCSCR_TOIE (1 << 2) /* Timeout Interrupt Enable @ */ #define SCSCR_CKE1 (1 << 1) /* Clock Enable 1 */ #define SCSCR_CKE0 (1 << 0) /* Clock Enable 0 */ -/* SCIFA/SCIFB only */ -#define SCSCR_TDRQE (1 << 15) /* Tx Data Transfer Request Enable */ -#define SCSCR_RDRQE (1 << 14) /* Rx Data Transfer Request Enable */ - -/* SCxSR (Serial Status Register) on SCI */ -#define SCI_TDRE 0x80 /* Transmit Data Register Empty */ -#define SCI_RDRF 0x40 /* Receive Data Register Full */ -#define SCI_ORER 0x20 /* Overrun Error */ -#define SCI_FER 0x10 /* Framing Error */ -#define SCI_PER 0x08 /* Parity Error */ -#define SCI_TEND 0x04 /* Transmit End */ - -#define SCI_DEFAULT_ERROR_MASK (SCI_PER | SCI_FER) - -/* SCxSR (Serial Status Register) on SCIF, HSCIF */ -#define SCIF_ER 0x0080 /* Receive Error */ -#define SCIF_TEND 0x0040 /* Transmission End */ -#define SCIF_TDFE 0x0020 /* Transmit FIFO Data Empty */ -#define SCIF_BRK 0x0010 /* Break Detect */ -#define SCIF_FER 0x0008 /* Framing Error */ -#define SCIF_PER 0x0004 /* Parity Error */ -#define SCIF_RDF 0x0002 /* Receive FIFO Data Full */ -#define SCIF_DR 0x0001 /* Receive Data Ready */ - -#define SCIF_DEFAULT_ERROR_MASK (SCIF_PER | SCIF_FER | SCIF_ER | SCIF_BRK) - -/* SCFCR (FIFO Control Register) */ -#define SCFCR_LOOP (1 << 0) /* Loopback Test */ - -/* SCSPTR (Serial Port Register), optional */ -#define SCSPTR_RTSIO (1 << 7) /* Serial Port RTS Pin Input/Output */ -#define SCSPTR_CTSIO (1 << 5) /* Serial Port CTS Pin Input/Output */ -#define SCSPTR_SPB2IO (1 << 1) /* Serial Port Break Input/Output */ -#define SCSPTR_SPB2DT (1 << 0) /* Serial Port Break Data */ - -/* HSSRR HSCIF */ -#define HSCIF_SRE 0x8000 /* Sampling Rate Register Enable */ + enum { SCIx_PROBE_REGTYPE, @@ -82,28 +39,6 @@ enum { SCIx_NR_REGTYPES, }; -/* - * SCI register subset common for all port types. - * Not all registers will exist on all parts. - */ -enum { - SCSMR, /* Serial Mode Register */ - SCBRR, /* Bit Rate Register */ - SCSCR, /* Serial Control Register */ - SCxSR, /* Serial Status Register */ - SCFCR, /* FIFO Control Register */ - SCFDR, /* FIFO Data Count Register */ - SCxTDR, /* Transmit (FIFO) Data Register */ - SCxRDR, /* Receive (FIFO) Data Register */ - SCLSR, /* Line Status Register */ - SCTFDR, /* Transmit FIFO Data Count Register */ - SCRFDR, /* Receive FIFO Data Count Register */ - SCSPTR, /* Serial Port Register */ - HSSRR, /* Sampling Rate Register */ - - SCIx_NR_REGS, -}; - struct device; struct plat_sci_port_ops { -- cgit v1.2.3 From d94a0a3857987c76c37a8095977fe554799ab69d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 30 Apr 2015 18:21:29 +0200 Subject: serial: sh-sci: Standardize on using the BIT() macro to define register bits Signed-off-by: Geert Uytterhoeven Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_sci.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h index 395fceb8c060..7c536ac5be05 100644 --- a/include/linux/serial_sci.h +++ b/include/linux/serial_sci.h @@ -1,6 +1,7 @@ #ifndef __LINUX_SERIAL_SCI_H #define __LINUX_SERIAL_SCI_H +#include #include #include @@ -11,14 +12,14 @@ #define SCIx_NOT_SUPPORTED (-1) /* Serial Control Register (@ = not supported by all parts) */ -#define SCSCR_TIE (1 << 7) /* Transmit Interrupt Enable */ -#define SCSCR_RIE (1 << 6) /* Receive Interrupt Enable */ -#define SCSCR_TE (1 << 5) /* Transmit Enable */ -#define SCSCR_RE (1 << 4) /* Receive Enable */ -#define SCSCR_REIE (1 << 3) /* Receive Error Interrupt Enable @ */ -#define SCSCR_TOIE (1 << 2) /* Timeout Interrupt Enable @ */ -#define SCSCR_CKE1 (1 << 1) /* Clock Enable 1 */ -#define SCSCR_CKE0 (1 << 0) /* Clock Enable 0 */ +#define SCSCR_TIE BIT(7) /* Transmit Interrupt Enable */ +#define SCSCR_RIE BIT(6) /* Receive Interrupt Enable */ +#define SCSCR_TE BIT(5) /* Transmit Enable */ +#define SCSCR_RE BIT(4) /* Receive Enable */ +#define SCSCR_REIE BIT(3) /* Receive Error Interrupt Enable @ */ +#define SCSCR_TOIE BIT(2) /* Timeout Interrupt Enable @ */ +#define SCSCR_CKE1 BIT(1) /* Clock Enable 1 */ +#define SCSCR_CKE0 BIT(0) /* Clock Enable 0 */ enum { @@ -48,7 +49,7 @@ struct plat_sci_port_ops { /* * Port-specific capabilities */ -#define SCIx_HAVE_RTSCTS (1 << 0) +#define SCIx_HAVE_RTSCTS BIT(0) /* * Platform device specific platform_data struct -- cgit v1.2.3 From bd63364caa8df38bad2b25b11b2a1b849475cce5 Mon Sep 17 00:00:00 2001 From: Scot Doyle Date: Thu, 26 Mar 2015 13:54:39 +0000 Subject: vt: add cursor blink interval escape sequence Add an escape sequence to specify the current console's cursor blink interval. The interval is specified as a number of milliseconds until the next cursor display state toggle, from 50 to 65535. /proc/loadavg did not show a difference with a one msec interval, but the lower bound is set to 50 msecs since slower hardware wasn't tested. Store the interval in the vc_data structure for later access by fbcon, initializing the value to fbcon's current hardcoded value of 200 msecs. Signed-off-by: Scot Doyle Acked-by: Pavel Machek Signed-off-by: Greg Kroah-Hartman --- include/linux/console_struct.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h index e859c98d1767..e329ee2667e1 100644 --- a/include/linux/console_struct.h +++ b/include/linux/console_struct.h @@ -104,6 +104,7 @@ struct vc_data { unsigned int vc_resize_user; /* resize request from user */ unsigned int vc_bell_pitch; /* Console bell pitch */ unsigned int vc_bell_duration; /* Console bell duration */ + unsigned short vc_cur_blink_ms; /* Cursor blink duration */ struct vc_data **vc_display_fg; /* [!] Ptr to var holding fg console for this display */ struct uni_pagedir *vc_uni_pagedir; struct uni_pagedir **vc_uni_pagedir_loc; /* [!] Location of uni_pagedir variable for this console */ -- cgit v1.2.3 From 1a48632ffed61352a7810ce089dc5a8bcd505a60 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Mon, 13 Apr 2015 13:24:34 -0400 Subject: pty: Fix input race when closing A read() from a pty master may mistakenly indicate EOF (errno == -EIO) after the pty slave has closed, even though input data remains to be read. For example, pty slave | input worker | pty master | | | | n_tty_read() pty_write() | | input avail? no add data | | sleep schedule worker --->| | . |---> flush_to_ldisc() | . pty_close() | fill read buffer | . wait for worker | wakeup reader --->| . | read buffer full? |---> input avail ? yes |<--- yes - exit worker | copy 4096 bytes to user TTY_OTHER_CLOSED <---| |<--- kick worker | | **** New read() before worker starts **** | | n_tty_read() | | input avail? no | | TTY_OTHER_CLOSED? yes | | return -EIO Several conditions are required to trigger this race: 1. the ldisc read buffer must become full so the input worker exits 2. the read() count parameter must be >= 4096 so the ldisc read buffer is empty 3. the subsequent read() occurs before the kicked worker has processed more input However, the underlying cause of the race is that data is pipelined, while tty state is not; ie., data already written by the pty slave end is not yet visible to the pty master end, but state changes by the pty slave end are visible to the pty master end immediately. Pipeline the TTY_OTHER_CLOSED state through input worker to the reader. 1. Introduce TTY_OTHER_DONE which is set by the input worker when TTY_OTHER_CLOSED is set and either the input buffers are flushed or input processing has completed. Readers/polls are woken when TTY_OTHER_DONE is set. 2. Reader/poll checks TTY_OTHER_DONE instead of TTY_OTHER_CLOSED. 3. A new input worker is started from pty_close() after setting TTY_OTHER_CLOSED, which ensures the TTY_OTHER_DONE state will be set if the last input worker is already finished (or just about to exit). Remove tty_flush_to_ldisc(); no in-tree callers. Fixes: 52bce7f8d4fc ("pty, n_tty: Simplify input processing on final close") Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=96311 BugLink: http://bugs.launchpad.net/bugs/1429756 Cc: # 3.19+ Reported-by: Andy Whitcroft Reported-by: H.J. Lu Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index fe5623c9af71..d76631f615c2 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -339,6 +339,7 @@ struct tty_file_private { #define TTY_EXCLUSIVE 3 /* Exclusive open mode */ #define TTY_DEBUG 4 /* Debugging */ #define TTY_DO_WRITE_WAKEUP 5 /* Call write_wakeup after queuing new */ +#define TTY_OTHER_DONE 6 /* Closed pty has completed input processing */ #define TTY_LDISC_OPEN 11 /* Line discipline is open */ #define TTY_PTY_LOCK 16 /* pty private */ #define TTY_NO_WRITE_SPLIT 17 /* Preserve write boundaries to driver */ @@ -462,7 +463,6 @@ extern int tty_hung_up_p(struct file *filp); extern void do_SAK(struct tty_struct *tty); extern void __do_SAK(struct tty_struct *tty); extern void no_tty(void); -extern void tty_flush_to_ldisc(struct tty_struct *tty); extern void tty_buffer_free_all(struct tty_port *port); extern void tty_buffer_flush(struct tty_struct *tty, struct tty_ldisc *ld); extern void tty_buffer_init(struct tty_port *port); -- cgit v1.2.3 From faaa44955dedc661f083636d816af90975a359ee Mon Sep 17 00:00:00 2001 From: Irina Tirdea Date: Wed, 29 Apr 2015 21:16:39 +0300 Subject: iio: core: Introduce IIO_CHAN_INFO_OVERSAMPLING_RATIO Some magnetometers can perform a number of repetitions in HW for each measurement to increase accuracy. One example is Bosch BMC150: http://ae-bst.resource.bosch.com/media/products/dokumente/bmc150/BST-BMC150-DS000-04.pdf. Introduce an interface to set the oversampling ratio for these devices. Signed-off-by: Irina Tirdea Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index b1e46ae89aa7..058441da4984 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -44,6 +44,7 @@ enum iio_chan_info_enum { IIO_CHAN_INFO_DEBOUNCE_COUNT, IIO_CHAN_INFO_DEBOUNCE_TIME, IIO_CHAN_INFO_CALIBEMISSIVITY, + IIO_CHAN_INFO_OVERSAMPLING_RATIO, }; enum iio_shared_by { -- cgit v1.2.3 From 61ba64fc0768879a300599b011c176203bdf27d9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 2 May 2015 09:54:06 -0400 Subject: libfs: simple_follow_link() let "fast" symlinks store the pointer to the body into ->i_link and use simple_follow_link for ->follow_link() Reviewed-by: Jan Kara Signed-off-by: Al Viro --- include/linux/fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 35ec87e490b1..0ac758fcff00 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -656,6 +656,7 @@ struct inode { struct pipe_inode_info *i_pipe; struct block_device *i_bdev; struct cdev *i_cdev; + char *i_link; }; __u32 i_generation; @@ -2721,6 +2722,8 @@ void __inode_sub_bytes(struct inode *inode, loff_t bytes); void inode_sub_bytes(struct inode *inode, loff_t bytes); loff_t inode_get_bytes(struct inode *inode); void inode_set_bytes(struct inode *inode, loff_t bytes); +void *simple_follow_link(struct dentry *, struct nameidata *); +extern const struct inode_operations simple_symlink_inode_operations; extern int iterate_dir(struct file *, struct dir_context *); -- cgit v1.2.3 From 5723cb01f0295ace2b029b0737dd6525a2de337f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 2 May 2015 10:27:18 -0400 Subject: debugfs: switch to simple_follow_link() Reviewed-by: Jan Kara Signed-off-by: Al Viro --- include/linux/debugfs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index cb25af461054..420311bcee38 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -45,7 +45,6 @@ extern struct dentry *arch_debugfs_dir; /* declared over in file.c */ extern const struct file_operations debugfs_file_operations; -extern const struct inode_operations debugfs_link_operations; struct dentry *debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, -- cgit v1.2.3 From 37882db0546c759ff75b561c188539ac96fd0bfe Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 23 Mar 2015 13:37:39 +1100 Subject: SECURITY: remove nameidata arg from inode_follow_link. No ->inode_follow_link() methods use the nameidata arg, and it is about to become private to namei.c. So remove from all inode_follow_link() functions. Signed-off-by: NeilBrown Signed-off-by: Al Viro --- include/linux/security.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 18264ea9e314..62a66202ecf1 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -43,7 +43,6 @@ struct file; struct vfsmount; struct path; struct qstr; -struct nameidata; struct iattr; struct fown_struct; struct file_operations; @@ -477,7 +476,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @inode_follow_link: * Check permission to follow a symbolic link when looking up a pathname. * @dentry contains the dentry structure for the link. - * @nd contains the nameidata structure for the parent directory. * Return 0 if permission is granted. * @inode_permission: * Check permission before accessing an inode. This hook is called by the @@ -1553,7 +1551,7 @@ struct security_operations { int (*inode_rename) (struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); int (*inode_readlink) (struct dentry *dentry); - int (*inode_follow_link) (struct dentry *dentry, struct nameidata *nd); + int (*inode_follow_link) (struct dentry *dentry); int (*inode_permission) (struct inode *inode, int mask); int (*inode_setattr) (struct dentry *dentry, struct iattr *attr); int (*inode_getattr) (const struct path *path); @@ -1839,7 +1837,7 @@ int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags); int security_inode_readlink(struct dentry *dentry); -int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd); +int security_inode_follow_link(struct dentry *dentry); int security_inode_permission(struct inode *inode, int mask); int security_inode_setattr(struct dentry *dentry, struct iattr *attr); int security_inode_getattr(const struct path *path); @@ -2241,8 +2239,7 @@ static inline int security_inode_readlink(struct dentry *dentry) return 0; } -static inline int security_inode_follow_link(struct dentry *dentry, - struct nameidata *nd) +static inline int security_inode_follow_link(struct dentry *dentry) { return 0; } -- cgit v1.2.3 From 680baacbca69d18a6d7315374ad83d05ac9c0977 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 2 May 2015 13:32:22 -0400 Subject: new ->follow_link() and ->put_link() calling conventions a) instead of storing the symlink body (via nd_set_link()) and returning an opaque pointer later passed to ->put_link(), ->follow_link() _stores_ that opaque pointer (into void * passed by address by caller) and returns the symlink body. Returning ERR_PTR() on error, NULL on jump (procfs magic symlinks) and pointer to symlink body for normal symlinks. Stored pointer is ignored in all cases except the last one. Storing NULL for opaque pointer (or not storing it at all) means no call of ->put_link(). b) the body used to be passed to ->put_link() implicitly (via nameidata). Now only the opaque pointer is. In the cases when we used the symlink body to free stuff, ->follow_link() now should store it as opaque pointer in addition to returning it. Signed-off-by: Al Viro --- include/linux/fs.h | 12 ++++++------ include/linux/namei.h | 2 -- 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0ac758fcff00..9ab934113a28 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1608,12 +1608,12 @@ struct file_operations { struct inode_operations { struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); - void * (*follow_link) (struct dentry *, struct nameidata *); + const char * (*follow_link) (struct dentry *, void **, struct nameidata *); int (*permission) (struct inode *, int); struct posix_acl * (*get_acl)(struct inode *, int); int (*readlink) (struct dentry *, char __user *,int); - void (*put_link) (struct dentry *, struct nameidata *, void *); + void (*put_link) (struct dentry *, void *); int (*create) (struct inode *,struct dentry *, umode_t, bool); int (*link) (struct dentry *,struct inode *,struct dentry *); @@ -2705,13 +2705,13 @@ extern const struct file_operations generic_ro_fops; extern int readlink_copy(char __user *, int, const char *); extern int page_readlink(struct dentry *, char __user *, int); -extern void *page_follow_link_light(struct dentry *, struct nameidata *); -extern void page_put_link(struct dentry *, struct nameidata *, void *); +extern const char *page_follow_link_light(struct dentry *, void **, struct nameidata *); +extern void page_put_link(struct dentry *, void *); extern int __page_symlink(struct inode *inode, const char *symname, int len, int nofs); extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; -extern void kfree_put_link(struct dentry *, struct nameidata *, void *); +extern void kfree_put_link(struct dentry *, void *); extern int generic_readlink(struct dentry *, char __user *, int); extern void generic_fillattr(struct inode *, struct kstat *); int vfs_getattr_nosec(struct path *path, struct kstat *stat); @@ -2722,7 +2722,7 @@ void __inode_sub_bytes(struct inode *inode, loff_t bytes); void inode_sub_bytes(struct inode *inode, loff_t bytes); loff_t inode_get_bytes(struct inode *inode); void inode_set_bytes(struct inode *inode, loff_t bytes); -void *simple_follow_link(struct dentry *, struct nameidata *); +const char *simple_follow_link(struct dentry *, void **, struct nameidata *); extern const struct inode_operations simple_symlink_inode_operations; extern int iterate_dir(struct file *, struct dir_context *); diff --git a/include/linux/namei.h b/include/linux/namei.h index c8990779f0c3..a5d5bed2c0e1 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -71,8 +71,6 @@ extern struct dentry *lock_rename(struct dentry *, struct dentry *); extern void unlock_rename(struct dentry *, struct dentry *); extern void nd_jump_link(struct nameidata *nd, struct path *path); -extern void nd_set_link(struct nameidata *nd, char *path); -extern char *nd_get_link(struct nameidata *nd); static inline void nd_terminate_link(void *name, size_t len, size_t maxlen) { -- cgit v1.2.3 From 894bc8c4662ba9daceafe943a5ba0dd407da5cd3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 2 May 2015 07:16:16 -0400 Subject: namei: remove restrictions on nesting depth The only restriction is that on the total amount of symlinks crossed; how they are nested does not matter Signed-off-by: Al Viro --- include/linux/namei.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/namei.h b/include/linux/namei.h index a5d5bed2c0e1..3a6cc9651712 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -11,6 +11,8 @@ struct nameidata; enum { MAX_NESTED_LINKS = 8 }; +#define MAXSYMLINKS 40 + /* * Type of the last component on LOOKUP_PARENT */ -- cgit v1.2.3 From 756daf263ea53a8bfc89db26cb92e963953253a1 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 23 Mar 2015 13:37:38 +1100 Subject: VFS: replace {, total_}link_count in task_struct with pointer to nameidata task_struct currently contains two ad-hoc members for use by the VFS: link_count and total_link_count. These are only interesting to fs/namei.c, so exposing them explicitly is poor layering. Incidentally, link_count isn't used anymore, so it can just die. This patches replaces those with a single pointer to 'struct nameidata'. This structure represents the current filename lookup of which there can only be one per process, and is a natural place to store total_link_count. This will allow the current "nameidata" argument to all follow_link operations to be removed as current->nameidata can be used instead in the _very_ few instances that care about it at all. As there are occasional circumstances where pathname lookup can recurse, such as through kern_path_locked, we always save and old current->nameidata (if there is one) when setting a new value, and make sure any active link_counts are preserved. follow_mount and follow_automount now get a 'struct nameidata *' rather than 'int flags' so that they can directly access total_link_count, rather than going through 'current'. Suggested-by: Al Viro Signed-off-by: NeilBrown Signed-off-by: Al Viro --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 26a2e6122734..f6c9b69d66f2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1461,7 +1461,7 @@ struct task_struct { it with task_lock()) - initialized normally by setup_new_exec */ /* file system info */ - int link_count, total_link_count; + struct nameidata *nameidata; #ifdef CONFIG_SYSVIPC /* ipc stuff */ struct sysv_sem sysvsem; -- cgit v1.2.3 From 6e77137b363b8d866ac29c5a0c95e953614fb2d8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 2 May 2015 13:37:52 -0400 Subject: don't pass nameidata to ->follow_link() its only use is getting passed to nd_jump_link(), which can obtain it from current->nameidata Signed-off-by: Al Viro --- include/linux/fs.h | 6 +++--- include/linux/namei.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 9ab934113a28..ed7c9f298759 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1608,7 +1608,7 @@ struct file_operations { struct inode_operations { struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); - const char * (*follow_link) (struct dentry *, void **, struct nameidata *); + const char * (*follow_link) (struct dentry *, void **); int (*permission) (struct inode *, int); struct posix_acl * (*get_acl)(struct inode *, int); @@ -2705,7 +2705,7 @@ extern const struct file_operations generic_ro_fops; extern int readlink_copy(char __user *, int, const char *); extern int page_readlink(struct dentry *, char __user *, int); -extern const char *page_follow_link_light(struct dentry *, void **, struct nameidata *); +extern const char *page_follow_link_light(struct dentry *, void **); extern void page_put_link(struct dentry *, void *); extern int __page_symlink(struct inode *inode, const char *symname, int len, int nofs); @@ -2722,7 +2722,7 @@ void __inode_sub_bytes(struct inode *inode, loff_t bytes); void inode_sub_bytes(struct inode *inode, loff_t bytes); loff_t inode_get_bytes(struct inode *inode); void inode_set_bytes(struct inode *inode, loff_t bytes); -const char *simple_follow_link(struct dentry *, void **, struct nameidata *); +const char *simple_follow_link(struct dentry *, void **); extern const struct inode_operations simple_symlink_inode_operations; extern int iterate_dir(struct file *, struct dir_context *); diff --git a/include/linux/namei.h b/include/linux/namei.h index 3a6cc9651712..d756304aa09b 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -72,7 +72,7 @@ extern int follow_up(struct path *); extern struct dentry *lock_rename(struct dentry *, struct dentry *); extern void unlock_rename(struct dentry *, struct dentry *); -extern void nd_jump_link(struct nameidata *nd, struct path *path); +extern void nd_jump_link(struct path *path); static inline void nd_terminate_link(void *name, size_t len, size_t maxlen) { -- cgit v1.2.3 From 2da572c959dd5815aef153cf62010b16a498a0d3 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Thu, 7 May 2015 13:49:14 -0400 Subject: lib: add software 842 compression/decompression Add 842-format software compression and decompression functions. Update the MAINTAINERS 842 section to include the new files. The 842 compression function can compress any input data into the 842 compression format. The 842 decompression function can decompress any standard-format 842 compressed data - specifically, either a compressed data buffer created by the 842 software compression function, or a compressed data buffer created by the 842 hardware compressor (located in PowerPC coprocessors). The 842 compressed data format is explained in the header comments. This is used in a later patch to provide a full software 842 compression and decompression crypto interface. Signed-off-by: Dan Streetman Signed-off-by: Herbert Xu --- include/linux/sw842.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 include/linux/sw842.h (limited to 'include/linux') diff --git a/include/linux/sw842.h b/include/linux/sw842.h new file mode 100644 index 000000000000..109ba041c2ae --- /dev/null +++ b/include/linux/sw842.h @@ -0,0 +1,12 @@ +#ifndef __SW842_H__ +#define __SW842_H__ + +#define SW842_MEM_COMPRESS (0xf000) + +int sw842_compress(const u8 *src, unsigned int srclen, + u8 *dst, unsigned int *destlen, void *wmem); + +int sw842_decompress(const u8 *src, unsigned int srclen, + u8 *dst, unsigned int *destlen); + +#endif -- cgit v1.2.3 From 7011a122383e36dab594406720fa1d089e0be8f9 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Thu, 7 May 2015 13:49:17 -0400 Subject: crypto: nx - add NX-842 platform frontend driver Add NX-842 frontend that allows using either the pSeries platform or PowerNV platform driver (to be added by later patch) for the NX-842 hardware. Update the MAINTAINERS file to include the new filenames. Update Kconfig files to clarify titles and descriptions, and correct dependencies. Signed-off-by: Dan Streetman Signed-off-by: Herbert Xu --- include/linux/nx842.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nx842.h b/include/linux/nx842.h index a4d324c6406a..d919c22b7fd6 100644 --- a/include/linux/nx842.h +++ b/include/linux/nx842.h @@ -1,11 +1,13 @@ #ifndef __NX842_H__ #define __NX842_H__ -int nx842_get_workmem_size(void); -int nx842_get_workmem_size_aligned(void); +#define __NX842_PSERIES_MEM_COMPRESS ((PAGE_SIZE * 2) + 10240) + +#define NX842_MEM_COMPRESS __NX842_PSERIES_MEM_COMPRESS + int nx842_compress(const unsigned char *in, unsigned int in_len, - unsigned char *out, unsigned int *out_len, void *wrkmem); + unsigned char *out, unsigned int *out_len, void *wrkmem); int nx842_decompress(const unsigned char *in, unsigned int in_len, - unsigned char *out, unsigned int *out_len, void *wrkmem); + unsigned char *out, unsigned int *out_len, void *wrkmem); #endif -- cgit v1.2.3 From 959e6659b6f74ec1fa4d391a3b88d63dc0189f36 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Thu, 7 May 2015 13:49:18 -0400 Subject: crypto: nx - add nx842 constraints Add "constraints" for the NX-842 driver. The constraints are used to indicate what the current NX-842 platform driver is capable of. The constraints tell the NX-842 user what alignment, min and max length, and length multiple each provided buffers should conform to. These are required because the 842 hardware requires buffers to meet specific constraints that vary based on platform - for example, the pSeries max length is much lower than the PowerNV max length. Signed-off-by: Dan Streetman Signed-off-by: Herbert Xu --- include/linux/nx842.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nx842.h b/include/linux/nx842.h index d919c22b7fd6..aa1a97e90dea 100644 --- a/include/linux/nx842.h +++ b/include/linux/nx842.h @@ -5,6 +5,15 @@ #define NX842_MEM_COMPRESS __NX842_PSERIES_MEM_COMPRESS +struct nx842_constraints { + int alignment; + int multiple; + int minimum; + int maximum; +}; + +int nx842_constraints(struct nx842_constraints *constraints); + int nx842_compress(const unsigned char *in, unsigned int in_len, unsigned char *out, unsigned int *out_len, void *wrkmem); int nx842_decompress(const unsigned char *in, unsigned int in_len, -- cgit v1.2.3 From 99182a42b7ef3d5e4180992ce01befd9e87526d2 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Thu, 7 May 2015 13:49:19 -0400 Subject: crypto: nx - add PowerNV platform NX-842 driver Add driver for NX-842 hardware on the PowerNV platform. This allows the use of the 842 compression hardware coprocessor on the PowerNV platform. Signed-off-by: Dan Streetman Signed-off-by: Herbert Xu --- include/linux/nx842.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nx842.h b/include/linux/nx842.h index aa1a97e90dea..4ddf68d9c0d4 100644 --- a/include/linux/nx842.h +++ b/include/linux/nx842.h @@ -1,9 +1,11 @@ #ifndef __NX842_H__ #define __NX842_H__ -#define __NX842_PSERIES_MEM_COMPRESS ((PAGE_SIZE * 2) + 10240) +#define __NX842_PSERIES_MEM_COMPRESS (10240) +#define __NX842_POWERNV_MEM_COMPRESS (1024) -#define NX842_MEM_COMPRESS __NX842_PSERIES_MEM_COMPRESS +#define NX842_MEM_COMPRESS (max_t(unsigned int, \ + __NX842_PSERIES_MEM_COMPRESS, __NX842_POWERNV_MEM_COMPRESS)) struct nx842_constraints { int alignment; -- cgit v1.2.3 From b19e7f51a55fe740c18038d1d6957aedfc078d07 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Wed, 29 Apr 2015 18:34:59 +0300 Subject: gpio: gpio-generic: add flag to read out output value from reg_set The change introduces BGPIOF_READ_OUTPUT_REG_SET flag for gpio-generic GPIO chip implementation, which allows to get correct configured value from reg_set register, input value is still get from reg_dat. Signed-off-by: Vladimir Zapolskiy Signed-off-by: Linus Walleij --- include/linux/basic_mmio_gpio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/basic_mmio_gpio.h b/include/linux/basic_mmio_gpio.h index 0e97856b2cff..14eea946e640 100644 --- a/include/linux/basic_mmio_gpio.h +++ b/include/linux/basic_mmio_gpio.h @@ -74,5 +74,6 @@ int bgpio_init(struct bgpio_chip *bgc, struct device *dev, #define BGPIOF_UNREADABLE_REG_SET BIT(1) /* reg_set is unreadable */ #define BGPIOF_UNREADABLE_REG_DIR BIT(2) /* reg_dir is unreadable */ #define BGPIOF_BIG_ENDIAN_BYTE_ORDER BIT(3) +#define BGPIOF_READ_OUTPUT_REG_SET BIT(4) /* reg_set stores output value */ #endif /* __BASIC_MMIO_GPIO_H */ -- cgit v1.2.3 From c884fbd452147e952ae160e750553d00ea4dc4c9 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 6 May 2015 13:29:06 +0300 Subject: gpio / ACPI: Add support for retrieving GpioInt resources from a device ACPI specification knows two types of GPIOs: GpioIo and GpioInt. The latter is used to describe that a given device interrupt line is connected to a specific GPIO pin. Typical ACPI _CRS entry for such device looks like below: Name (_CRS, ResourceTemplate () { I2cSerialBus (0x004A, ControllerInitiated, 0x00061A80, AddressingMode7Bit, "\\_SB.PCI0.I2C6", 0x00, ResourceConsumer) GpioIo (Exclusive, PullDefault, 0x0000, 0x0000, IoRestrictionOutputOnly, "\\_SB.GPO0", 0x00, ResourceConsumer) { 0x004B } GpioInt (Level, ActiveLow, Shared, PullDefault, 0x0000, "\\_SB.GPO0", 0x00, ResourceConsumer) { 0x004C } }) Currently drivers need to request a GPIO corresponding to the right GpioInt and then translate that to Linux IRQ number. This adds unnecessary lines of boiler-plate code. We can ease this a bit by introducing acpi_dev_gpio_irq_get() analogous to of_irq_get(). This function translates given GpioInt resource under the device in question to the suitable Linux IRQ number. Signed-off-by: Mika Westerberg Acked-by: Rafael J. Wysocki Signed-off-by: Linus Walleij --- include/linux/acpi.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index e4da5e35e29c..f57c440642cd 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -721,6 +721,8 @@ static inline void acpi_dev_remove_driver_gpios(struct acpi_device *adev) if (adev) adev->driver_gpios = NULL; } + +int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index); #else static inline int acpi_dev_add_driver_gpios(struct acpi_device *adev, const struct acpi_gpio_mapping *gpios) @@ -728,6 +730,11 @@ static inline int acpi_dev_add_driver_gpios(struct acpi_device *adev, return -ENXIO; } static inline void acpi_dev_remove_driver_gpios(struct acpi_device *adev) {} + +static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) +{ + return -ENXIO; +} #endif /* Device properties */ -- cgit v1.2.3 From bda0be7ad994812960e9f8f2d2757f72cb4a96cb Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 23 Mar 2015 13:37:39 +1100 Subject: security: make inode_follow_link RCU-walk aware inode_follow_link now takes an inode and rcu flag as well as the dentry. inode is used in preference to d_backing_inode(dentry), particularly in RCU-walk mode. selinux_inode_follow_link() gets dentry_has_perm() and inode_has_perm() open-coded into it so that it can call avc_has_perm_flags() in way that is safe if LOOKUP_RCU is set. Calling avc_has_perm_flags() with rcu_read_lock() held means that when avc_has_perm_noaudit calls avc_compute_av(), the attempt to rcu_read_unlock() before calling security_compute_av() will not actually drop the RCU read-lock. However as security_compute_av() is completely in a read_lock()ed region, it should be safe with the RCU read-lock held. Signed-off-by: NeilBrown Signed-off-by: Al Viro --- include/linux/security.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 62a66202ecf1..52febde52479 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -476,6 +476,8 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @inode_follow_link: * Check permission to follow a symbolic link when looking up a pathname. * @dentry contains the dentry structure for the link. + * @inode contains the inode, which itself is not stable in RCU-walk + * @rcu indicates whether we are in RCU-walk mode. * Return 0 if permission is granted. * @inode_permission: * Check permission before accessing an inode. This hook is called by the @@ -1551,7 +1553,8 @@ struct security_operations { int (*inode_rename) (struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); int (*inode_readlink) (struct dentry *dentry); - int (*inode_follow_link) (struct dentry *dentry); + int (*inode_follow_link) (struct dentry *dentry, struct inode *inode, + bool rcu); int (*inode_permission) (struct inode *inode, int mask); int (*inode_setattr) (struct dentry *dentry, struct iattr *attr); int (*inode_getattr) (const struct path *path); @@ -1837,7 +1840,8 @@ int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags); int security_inode_readlink(struct dentry *dentry); -int security_inode_follow_link(struct dentry *dentry); +int security_inode_follow_link(struct dentry *dentry, struct inode *inode, + bool rcu); int security_inode_permission(struct inode *inode, int mask); int security_inode_setattr(struct dentry *dentry, struct iattr *attr); int security_inode_getattr(const struct path *path); @@ -2239,7 +2243,9 @@ static inline int security_inode_readlink(struct dentry *dentry) return 0; } -static inline int security_inode_follow_link(struct dentry *dentry) +static inline int security_inode_follow_link(struct dentry *dentry, + struct inode *inode, + bool rcu) { return 0; } -- cgit v1.2.3 From 5f2c4179e129bdc47870a81a65d0aff85aa18293 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 7 May 2015 11:14:26 -0400 Subject: switch ->put_link() from dentry to inode only one instance looks at that argument at all; that sole exception wants inode rather than dentry. Signed-off-by: Al Viro --- include/linux/fs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index ed7c9f298759..f21e3328f991 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1613,7 +1613,7 @@ struct inode_operations { struct posix_acl * (*get_acl)(struct inode *, int); int (*readlink) (struct dentry *, char __user *,int); - void (*put_link) (struct dentry *, void *); + void (*put_link) (struct inode *, void *); int (*create) (struct inode *,struct dentry *, umode_t, bool); int (*link) (struct dentry *,struct inode *,struct dentry *); @@ -2706,12 +2706,12 @@ extern const struct file_operations generic_ro_fops; extern int readlink_copy(char __user *, int, const char *); extern int page_readlink(struct dentry *, char __user *, int); extern const char *page_follow_link_light(struct dentry *, void **); -extern void page_put_link(struct dentry *, void *); +extern void page_put_link(struct inode *, void *); extern int __page_symlink(struct inode *inode, const char *symname, int len, int nofs); extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; -extern void kfree_put_link(struct dentry *, void *); +extern void kfree_put_link(struct inode *, void *); extern int generic_readlink(struct dentry *, char __user *, int); extern void generic_fillattr(struct inode *, struct kstat *); int vfs_getattr_nosec(struct path *path, struct kstat *stat); -- cgit v1.2.3 From ecc087ff14352aed52b8e775b4511e7f9cfc64ec Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 7 May 2015 11:19:14 -0400 Subject: new helper: free_page_put_link() similar to kfree_put_link() Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index f21e3328f991..8f738512c874 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2712,6 +2712,7 @@ extern int __page_symlink(struct inode *inode, const char *symname, int len, extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; extern void kfree_put_link(struct inode *, void *); +extern void free_page_put_link(struct inode *, void *); extern int generic_readlink(struct dentry *, char __user *, int); extern void generic_fillattr(struct inode *, struct kstat *); int vfs_getattr_nosec(struct path *path, struct kstat *stat); -- cgit v1.2.3 From 140e807da12988e2a925fe029336e7bb67a8d4de Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 8 May 2015 21:07:08 -0500 Subject: tun: Utilize the normal socket network namespace refcounting. There is no need for tun to do the weird network namespace refcounting. The existing network namespace refcounting in tfile has almost exactly the same lifetime. So rewrite the code to use the struct sock network namespace refcounting and remove the unnecessary hand rolled network namespace refcounting and the unncesary tfile->net. This change allows the tun code to directly call sock_put bypassing sock_release and making SOCK_EXTERNALLY_ALLOCATED unnecessary. Remove the now unncessary tun_release so that if anything tries to use the sock_release code path the kernel will oops, and let us know about the bug. The macvtap code already uses it's internal socket this way. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/net.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 738ea48be889..8a5e81d2bdf7 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -38,7 +38,6 @@ struct net; #define SOCK_NOSPACE 2 #define SOCK_PASSCRED 3 #define SOCK_PASSSEC 4 -#define SOCK_EXTERNALLY_ALLOCATED 5 #ifndef ARCH_HAS_SOCKET_TYPES /** -- cgit v1.2.3 From eeb1bd5c40edb0e2fd925c8535e2fdebdbc5cef2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 8 May 2015 21:08:05 -0500 Subject: net: Add a struct net parameter to sock_create_kern This is long overdue, and is part of cleaning up how we allocate kernel sockets that don't reference count struct net. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/net.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 8a5e81d2bdf7..04aa06852771 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -207,7 +207,7 @@ void sock_unregister(int family); int __sock_create(struct net *net, int family, int type, int proto, struct socket **res, int kern); int sock_create(int family, int type, int proto, struct socket **res); -int sock_create_kern(int family, int type, int proto, struct socket **res); +int sock_create_kern(struct net *net, int family, int type, int proto, struct socket **res); int sock_create_lite(int family, int type, int proto, struct socket **res); void sock_release(struct socket *sock); int sock_sendmsg(struct socket *sock, struct msghdr *msg); -- cgit v1.2.3 From 11aa9c28b4209242a9de0a661a7b3405adb568a0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 8 May 2015 21:09:13 -0500 Subject: net: Pass kern from net_proto_family.create to sk_alloc In preparation for changing how struct net is refcounted on kernel sockets pass the knowledge that we are creating a kernel socket from sock_create_kern through to sk_alloc. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/if_pppox.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 66a7d7600f43..b49cf923becc 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -74,7 +74,7 @@ static inline struct sock *sk_pppox(struct pppox_sock *po) struct module; struct pppox_proto { - int (*create)(struct net *net, struct socket *sock); + int (*create)(struct net *net, struct socket *sock, int kern); int (*ioctl)(struct socket *sock, unsigned int cmd, unsigned long arg); struct module *owner; -- cgit v1.2.3 From d2788d34885d4ce5ba17a8996fd95d28942e574e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 9 May 2015 22:51:32 +0200 Subject: net: sched: further simplify handle_ing Ingress qdisc has no other purpose than calling into tc_classify() that executes attached classifier(s) and action(s). It has a 1:1 relationship to dev->ingress_queue. After having commit 087c1a601ad7 ("net: sched: run ingress qdisc without locks") removed the central ingress lock, one major contention point is gone. The extra indirection layers however, are not necessary for calling into ingress qdisc. pktgen calling locally into netif_receive_skb() with a dummy u32, single CPU result on a Supermicro X10SLM-F, Xeon E3-1240: before ~21,1 Mpps, after patch ~22,9 Mpps. We can redirect the private classifier list to the netdev directly, without changing any classifier API bits (!) and execute on that from handle_ing() side. The __QDISC_STATE_DEACTIVATE test can be removed, ingress qdisc doesn't have a queue and thus dev_deactivate_queue() is also not applicable, ingress_cl_list provides similar behaviour. In other words, ingress qdisc acts like TCQ_F_BUILTIN qdisc. One next possible step is the removal of the dev's ingress (dummy) netdev_queue, and to only have the list member in the netdevice itself. Note, the filter chain is RCU protected and individual filter elements are being kfree'd by sched subsystem after RCU grace period. RCU read lock is being held by __netif_receive_skb_core(). Joint work with Alexei Starovoitov. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1899c74a7127..c4e1caf6056f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1655,7 +1655,11 @@ struct net_device { rx_handler_func_t __rcu *rx_handler; void __rcu *rx_handler_data; +#if CONFIG_NET_CLS_ACT + struct tcf_proto __rcu *ingress_cl_list; +#endif struct netdev_queue __rcu *ingress_queue; + unsigned char broadcast[MAX_ADDR_LEN]; #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *rx_cpu_rmap; -- cgit v1.2.3 From 6be109b31ccdb9c98e7be12687171f6602527a5d Mon Sep 17 00:00:00 2001 From: Arun Ramamurthy Date: Wed, 22 Apr 2015 16:04:11 -0700 Subject: phy: core: Add devm_of_phy_get_by_index to phy-core Some generic drivers, such as ehci, may use multiple phys and for such drivers referencing phy(s) by name(s) does not make sense. Instead of inventing new naming schemes and using custom code to iterate through them, such drivers are better of using nameless phy bindings and using this newly introduced API to iterate through them. Signed-off-by: Arun Ramamurthy Reviewed-by: Ray Jui Reviewed-by: Scott Branden [kishon@ti.com: fix compilation errors] Signed-off-by: Kishon Vijay Abraham I --- include/linux/phy/phy.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index a0197fa1b116..8cf05e341cff 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -133,6 +133,8 @@ struct phy *devm_phy_get(struct device *dev, const char *string); struct phy *devm_phy_optional_get(struct device *dev, const char *string); struct phy *devm_of_phy_get(struct device *dev, struct device_node *np, const char *con_id); +struct phy *devm_of_phy_get_by_index(struct device *dev, struct device_node *np, + int index); void phy_put(struct phy *phy); void devm_phy_put(struct device *dev, struct phy *phy); struct phy *of_phy_get(struct device_node *np, const char *con_id); @@ -261,6 +263,13 @@ static inline struct phy *devm_of_phy_get(struct device *dev, return ERR_PTR(-ENOSYS); } +static inline struct phy *devm_of_phy_get_by_index(struct device *dev, + struct device_node *np, + int index) +{ + return ERR_PTR(-ENOSYS); +} + static inline void phy_put(struct phy *phy) { } -- cgit v1.2.3 From 4cda01e86f68dacc758b2daf6e8809f2ce915b85 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 11 May 2015 19:28:49 +0200 Subject: net: sched: fix typo in net_device ifdef This should have been #ifdef not #if. Reported-by: Fengguang Wu Fixes: d2788d34885d ("net: sched: further simplify handle_ing") Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c4e1caf6056f..a6d706b2a947 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1655,7 +1655,7 @@ struct net_device { rx_handler_func_t __rcu *rx_handler; void __rcu *rx_handler_data; -#if CONFIG_NET_CLS_ACT +#ifdef CONFIG_NET_CLS_ACT struct tcf_proto __rcu *ingress_cl_list; #endif struct netdev_queue __rcu *ingress_queue; -- cgit v1.2.3 From 5844feeaa4154d1c46d3462c7a4653d22356d8b4 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 23 Jan 2015 00:22:27 -0800 Subject: mtd: nand: add common DT init code These are already-documented common bindings for NAND chips. Let's handle them in nand_base. If NAND controller drivers need to act on this data before bringing up the NAND chip (e.g., fill out ECC callback functions, change HW modes, etc.), then they can do so between calling nand_scan_ident() and nand_scan_tail(). Signed-off-by: Brian Norris --- include/linux/mtd/nand.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 6c51876941f3..f25e2bdd188c 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -26,6 +26,8 @@ struct mtd_info; struct nand_flash_dev; +struct device_node; + /* Scan and identify a NAND device */ extern int nand_scan(struct mtd_info *mtd, int max_chips); /* @@ -542,6 +544,7 @@ struct nand_buffers { * flash device * @IO_ADDR_W: [BOARDSPECIFIC] address to write the 8 I/O lines of the * flash device. + * @dn: [BOARDSPECIFIC] device node describing this instance * @read_byte: [REPLACEABLE] read one byte from the chip * @read_word: [REPLACEABLE] read one word from the chip * @write_byte: [REPLACEABLE] write a single byte to the chip on the @@ -644,6 +647,8 @@ struct nand_chip { void __iomem *IO_ADDR_R; void __iomem *IO_ADDR_W; + struct device_node *dn; + uint8_t (*read_byte)(struct mtd_info *mtd); u16 (*read_word)(struct mtd_info *mtd); void (*write_byte)(struct mtd_info *mtd, uint8_t byte); -- cgit v1.2.3 From 9d0be7f4810257a9b0fc78fff641f14409f14ab3 Mon Sep 17 00:00:00 2001 From: Eduardo Valentin Date: Mon, 11 May 2015 19:34:23 -0700 Subject: thermal: support slope and offset coefficients It is common to have a linear extrapolation from the current sensor readings and the actual temperature value. This is specially the case when the sensor is in use to extrapolate hotspots. This patch adds slope and offset constants for single sensor linear extrapolation equation. Because the same sensor can be use in different locations, from board to board, these constants are added as part of thermal_zone_params. The constants are available through sysfs. It is up to the device driver to determine the usage of these values. Signed-off-by: Eduardo Valentin --- include/linux/thermal.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 6bbe11c97cea..037e9df2f610 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -302,6 +302,17 @@ struct thermal_zone_params { /* threshold below which the error is no longer accumulated */ s32 integral_cutoff; + + /* + * @slope: slope of a linear temperature adjustment curve. + * Used by thermal zone drivers. + */ + int slope; + /* + * @offset: offset of a linear temperature adjustment curve. + * Used by thermal zone drivers (default 0). + */ + int offset; }; struct thermal_genl_event { -- cgit v1.2.3 From 05ae797566a66d159cf1e2ee11bf3f6fae40c8eb Mon Sep 17 00:00:00 2001 From: Andrew Bresticker Date: Mon, 4 May 2015 10:36:35 -0700 Subject: mailbox: Make mbox_chan_ops const The mailbox controller's channel ops ought to be read-only. Update all the mailbox drivers to make their mbox_chan_ops const as well. Signed-off-by: Andrew Bresticker Cc: Ashwin Chaugule Cc: Ley Foon Tan Acked-by: Suman Anna Signed-off-by: Jassi Brar --- include/linux/mailbox_controller.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h index d4cf96f07cfc..68c42454439b 100644 --- a/include/linux/mailbox_controller.h +++ b/include/linux/mailbox_controller.h @@ -72,7 +72,7 @@ struct mbox_chan_ops { */ struct mbox_controller { struct device *dev; - struct mbox_chan_ops *ops; + const struct mbox_chan_ops *ops; struct mbox_chan *chans; int num_chans; bool txdone_irq; -- cgit v1.2.3 From 3c4ed7bdf5997d8020cbb8d4abbef2fcfb9f1284 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Sat, 2 May 2015 15:10:46 -0700 Subject: LSM: Split security.h The security.h header file serves two purposes, interfaces for users of the security modules and interfaces for security modules. Users of the security modules don't need to know about what's in the security_operations structure, so pull it out into it's own header, lsm_hooks.h Signed-off-by: Casey Schaufler Acked-by: John Johansen Acked-by: Kees Cook Acked-by: Paul Moore Acked-by: Stephen Smalley Acked-by: Tetsuo Handa Signed-off-by: James Morris --- include/linux/lsm_hooks.h | 358 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/security.h | 305 --------------------------------------- 2 files changed, 358 insertions(+), 305 deletions(-) create mode 100644 include/linux/lsm_hooks.h (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h new file mode 100644 index 000000000000..c60f81b2d18c --- /dev/null +++ b/include/linux/lsm_hooks.h @@ -0,0 +1,358 @@ +/* + * Linux Security Module interfaces + * + * Copyright (C) 2001 WireX Communications, Inc + * Copyright (C) 2001 Greg Kroah-Hartman + * Copyright (C) 2001 Networks Associates Technology, Inc + * Copyright (C) 2001 James Morris + * Copyright (C) 2001 Silicon Graphics, Inc. (Trust Technology Group) + * Copyright (C) 2015 Intel Corporation. + * Copyright (C) 2015 Casey Schaufler + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Due to this file being licensed under the GPL there is controversy over + * whether this permits you to write a module that #includes this file + * without placing your module under the GPL. Please consult a lawyer for + * advice before doing this. + * + */ + +#ifndef __LINUX_LSM_HOOKS_H +#define __LINUX_LSM_HOOKS_H + +#include + +/* Maximum number of letters for an LSM name string */ +#define SECURITY_NAME_MAX 10 + +#ifdef CONFIG_SECURITY + +struct security_operations { + char name[SECURITY_NAME_MAX + 1]; + + int (*binder_set_context_mgr)(struct task_struct *mgr); + int (*binder_transaction)(struct task_struct *from, + struct task_struct *to); + int (*binder_transfer_binder)(struct task_struct *from, + struct task_struct *to); + int (*binder_transfer_file)(struct task_struct *from, + struct task_struct *to, + struct file *file); + + int (*ptrace_access_check)(struct task_struct *child, + unsigned int mode); + int (*ptrace_traceme)(struct task_struct *parent); + int (*capget)(struct task_struct *target, kernel_cap_t *effective, + kernel_cap_t *inheritable, kernel_cap_t *permitted); + int (*capset)(struct cred *new, const struct cred *old, + const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted); + int (*capable)(const struct cred *cred, struct user_namespace *ns, + int cap, int audit); + int (*quotactl)(int cmds, int type, int id, struct super_block *sb); + int (*quota_on)(struct dentry *dentry); + int (*syslog)(int type); + int (*settime)(const struct timespec *ts, const struct timezone *tz); + int (*vm_enough_memory)(struct mm_struct *mm, long pages); + + int (*bprm_set_creds)(struct linux_binprm *bprm); + int (*bprm_check_security)(struct linux_binprm *bprm); + int (*bprm_secureexec)(struct linux_binprm *bprm); + void (*bprm_committing_creds)(struct linux_binprm *bprm); + void (*bprm_committed_creds)(struct linux_binprm *bprm); + + int (*sb_alloc_security)(struct super_block *sb); + void (*sb_free_security)(struct super_block *sb); + int (*sb_copy_data)(char *orig, char *copy); + int (*sb_remount)(struct super_block *sb, void *data); + int (*sb_kern_mount)(struct super_block *sb, int flags, void *data); + int (*sb_show_options)(struct seq_file *m, struct super_block *sb); + int (*sb_statfs)(struct dentry *dentry); + int (*sb_mount)(const char *dev_name, struct path *path, + const char *type, unsigned long flags, void *data); + int (*sb_umount)(struct vfsmount *mnt, int flags); + int (*sb_pivotroot)(struct path *old_path, struct path *new_path); + int (*sb_set_mnt_opts)(struct super_block *sb, + struct security_mnt_opts *opts, + unsigned long kern_flags, + unsigned long *set_kern_flags); + int (*sb_clone_mnt_opts)(const struct super_block *oldsb, + struct super_block *newsb); + int (*sb_parse_opts_str)(char *options, struct security_mnt_opts *opts); + int (*dentry_init_security)(struct dentry *dentry, int mode, + struct qstr *name, void **ctx, + u32 *ctxlen); + + +#ifdef CONFIG_SECURITY_PATH + int (*path_unlink)(struct path *dir, struct dentry *dentry); + int (*path_mkdir)(struct path *dir, struct dentry *dentry, + umode_t mode); + int (*path_rmdir)(struct path *dir, struct dentry *dentry); + int (*path_mknod)(struct path *dir, struct dentry *dentry, + umode_t mode, unsigned int dev); + int (*path_truncate)(struct path *path); + int (*path_symlink)(struct path *dir, struct dentry *dentry, + const char *old_name); + int (*path_link)(struct dentry *old_dentry, struct path *new_dir, + struct dentry *new_dentry); + int (*path_rename)(struct path *old_dir, struct dentry *old_dentry, + struct path *new_dir, + struct dentry *new_dentry); + int (*path_chmod)(struct path *path, umode_t mode); + int (*path_chown)(struct path *path, kuid_t uid, kgid_t gid); + int (*path_chroot)(struct path *path); +#endif + + int (*inode_alloc_security)(struct inode *inode); + void (*inode_free_security)(struct inode *inode); + int (*inode_init_security)(struct inode *inode, struct inode *dir, + const struct qstr *qstr, + const char **name, void **value, + size_t *len); + int (*inode_create)(struct inode *dir, struct dentry *dentry, + umode_t mode); + int (*inode_link)(struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry); + int (*inode_unlink)(struct inode *dir, struct dentry *dentry); + int (*inode_symlink)(struct inode *dir, struct dentry *dentry, + const char *old_name); + int (*inode_mkdir)(struct inode *dir, struct dentry *dentry, + umode_t mode); + int (*inode_rmdir)(struct inode *dir, struct dentry *dentry); + int (*inode_mknod)(struct inode *dir, struct dentry *dentry, + umode_t mode, dev_t dev); + int (*inode_rename)(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, + struct dentry *new_dentry); + int (*inode_readlink)(struct dentry *dentry); + int (*inode_follow_link)(struct dentry *dentry, struct nameidata *nd); + int (*inode_permission)(struct inode *inode, int mask); + int (*inode_setattr)(struct dentry *dentry, struct iattr *attr); + int (*inode_getattr)(const struct path *path); + int (*inode_setxattr)(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags); + void (*inode_post_setxattr)(struct dentry *dentry, const char *name, + const void *value, size_t size, + int flags); + int (*inode_getxattr)(struct dentry *dentry, const char *name); + int (*inode_listxattr)(struct dentry *dentry); + int (*inode_removexattr)(struct dentry *dentry, const char *name); + int (*inode_need_killpriv)(struct dentry *dentry); + int (*inode_killpriv)(struct dentry *dentry); + int (*inode_getsecurity)(const struct inode *inode, const char *name, + void **buffer, bool alloc); + int (*inode_setsecurity)(struct inode *inode, const char *name, + const void *value, size_t size, + int flags); + int (*inode_listsecurity)(struct inode *inode, char *buffer, + size_t buffer_size); + void (*inode_getsecid)(const struct inode *inode, u32 *secid); + + int (*file_permission)(struct file *file, int mask); + int (*file_alloc_security)(struct file *file); + void (*file_free_security)(struct file *file); + int (*file_ioctl)(struct file *file, unsigned int cmd, + unsigned long arg); + int (*mmap_addr)(unsigned long addr); + int (*mmap_file)(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags); + int (*file_mprotect)(struct vm_area_struct *vma, unsigned long reqprot, + unsigned long prot); + int (*file_lock)(struct file *file, unsigned int cmd); + int (*file_fcntl)(struct file *file, unsigned int cmd, + unsigned long arg); + void (*file_set_fowner)(struct file *file); + int (*file_send_sigiotask)(struct task_struct *tsk, + struct fown_struct *fown, int sig); + int (*file_receive)(struct file *file); + int (*file_open)(struct file *file, const struct cred *cred); + + int (*task_create)(unsigned long clone_flags); + void (*task_free)(struct task_struct *task); + int (*cred_alloc_blank)(struct cred *cred, gfp_t gfp); + void (*cred_free)(struct cred *cred); + int (*cred_prepare)(struct cred *new, const struct cred *old, + gfp_t gfp); + void (*cred_transfer)(struct cred *new, const struct cred *old); + int (*kernel_act_as)(struct cred *new, u32 secid); + int (*kernel_create_files_as)(struct cred *new, struct inode *inode); + int (*kernel_fw_from_file)(struct file *file, char *buf, size_t size); + int (*kernel_module_request)(char *kmod_name); + int (*kernel_module_from_file)(struct file *file); + int (*task_fix_setuid)(struct cred *new, const struct cred *old, + int flags); + int (*task_setpgid)(struct task_struct *p, pid_t pgid); + int (*task_getpgid)(struct task_struct *p); + int (*task_getsid)(struct task_struct *p); + void (*task_getsecid)(struct task_struct *p, u32 *secid); + int (*task_setnice)(struct task_struct *p, int nice); + int (*task_setioprio)(struct task_struct *p, int ioprio); + int (*task_getioprio)(struct task_struct *p); + int (*task_setrlimit)(struct task_struct *p, unsigned int resource, + struct rlimit *new_rlim); + int (*task_setscheduler)(struct task_struct *p); + int (*task_getscheduler)(struct task_struct *p); + int (*task_movememory)(struct task_struct *p); + int (*task_kill)(struct task_struct *p, struct siginfo *info, + int sig, u32 secid); + int (*task_wait)(struct task_struct *p); + int (*task_prctl)(int option, unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5); + void (*task_to_inode)(struct task_struct *p, struct inode *inode); + + int (*ipc_permission)(struct kern_ipc_perm *ipcp, short flag); + void (*ipc_getsecid)(struct kern_ipc_perm *ipcp, u32 *secid); + + int (*msg_msg_alloc_security)(struct msg_msg *msg); + void (*msg_msg_free_security)(struct msg_msg *msg); + + int (*msg_queue_alloc_security)(struct msg_queue *msq); + void (*msg_queue_free_security)(struct msg_queue *msq); + int (*msg_queue_associate)(struct msg_queue *msq, int msqflg); + int (*msg_queue_msgctl)(struct msg_queue *msq, int cmd); + int (*msg_queue_msgsnd)(struct msg_queue *msq, struct msg_msg *msg, + int msqflg); + int (*msg_queue_msgrcv)(struct msg_queue *msq, struct msg_msg *msg, + struct task_struct *target, long type, + int mode); + + int (*shm_alloc_security)(struct shmid_kernel *shp); + void (*shm_free_security)(struct shmid_kernel *shp); + int (*shm_associate)(struct shmid_kernel *shp, int shmflg); + int (*shm_shmctl)(struct shmid_kernel *shp, int cmd); + int (*shm_shmat)(struct shmid_kernel *shp, char __user *shmaddr, + int shmflg); + + int (*sem_alloc_security)(struct sem_array *sma); + void (*sem_free_security)(struct sem_array *sma); + int (*sem_associate)(struct sem_array *sma, int semflg); + int (*sem_semctl)(struct sem_array *sma, int cmd); + int (*sem_semop)(struct sem_array *sma, struct sembuf *sops, + unsigned nsops, int alter); + + int (*netlink_send)(struct sock *sk, struct sk_buff *skb); + + void (*d_instantiate)(struct dentry *dentry, struct inode *inode); + + int (*getprocattr)(struct task_struct *p, char *name, char **value); + int (*setprocattr)(struct task_struct *p, char *name, void *value, + size_t size); + int (*ismaclabel)(const char *name); + int (*secid_to_secctx)(u32 secid, char **secdata, u32 *seclen); + int (*secctx_to_secid)(const char *secdata, u32 seclen, u32 *secid); + void (*release_secctx)(char *secdata, u32 seclen); + + int (*inode_notifysecctx)(struct inode *inode, void *ctx, u32 ctxlen); + int (*inode_setsecctx)(struct dentry *dentry, void *ctx, u32 ctxlen); + int (*inode_getsecctx)(struct inode *inode, void **ctx, u32 *ctxlen); + +#ifdef CONFIG_SECURITY_NETWORK + int (*unix_stream_connect)(struct sock *sock, struct sock *other, + struct sock *newsk); + int (*unix_may_send)(struct socket *sock, struct socket *other); + + int (*socket_create)(int family, int type, int protocol, int kern); + int (*socket_post_create)(struct socket *sock, int family, int type, + int protocol, int kern); + int (*socket_bind)(struct socket *sock, struct sockaddr *address, + int addrlen); + int (*socket_connect)(struct socket *sock, struct sockaddr *address, + int addrlen); + int (*socket_listen)(struct socket *sock, int backlog); + int (*socket_accept)(struct socket *sock, struct socket *newsock); + int (*socket_sendmsg)(struct socket *sock, struct msghdr *msg, + int size); + int (*socket_recvmsg)(struct socket *sock, struct msghdr *msg, + int size, int flags); + int (*socket_getsockname)(struct socket *sock); + int (*socket_getpeername)(struct socket *sock); + int (*socket_getsockopt)(struct socket *sock, int level, int optname); + int (*socket_setsockopt)(struct socket *sock, int level, int optname); + int (*socket_shutdown)(struct socket *sock, int how); + int (*socket_sock_rcv_skb)(struct sock *sk, struct sk_buff *skb); + int (*socket_getpeersec_stream)(struct socket *sock, + char __user *optval, + int __user *optlen, unsigned len); + int (*socket_getpeersec_dgram)(struct socket *sock, + struct sk_buff *skb, u32 *secid); + int (*sk_alloc_security)(struct sock *sk, int family, gfp_t priority); + void (*sk_free_security)(struct sock *sk); + void (*sk_clone_security)(const struct sock *sk, struct sock *newsk); + void (*sk_getsecid)(struct sock *sk, u32 *secid); + void (*sock_graft)(struct sock *sk, struct socket *parent); + int (*inet_conn_request)(struct sock *sk, struct sk_buff *skb, + struct request_sock *req); + void (*inet_csk_clone)(struct sock *newsk, + const struct request_sock *req); + void (*inet_conn_established)(struct sock *sk, struct sk_buff *skb); + int (*secmark_relabel_packet)(u32 secid); + void (*secmark_refcount_inc)(void); + void (*secmark_refcount_dec)(void); + void (*req_classify_flow)(const struct request_sock *req, + struct flowi *fl); + int (*tun_dev_alloc_security)(void **security); + void (*tun_dev_free_security)(void *security); + int (*tun_dev_create)(void); + int (*tun_dev_attach_queue)(void *security); + int (*tun_dev_attach)(struct sock *sk, void *security); + int (*tun_dev_open)(void *security); +#endif /* CONFIG_SECURITY_NETWORK */ + +#ifdef CONFIG_SECURITY_NETWORK_XFRM + int (*xfrm_policy_alloc_security)(struct xfrm_sec_ctx **ctxp, + struct xfrm_user_sec_ctx *sec_ctx, + gfp_t gfp); + int (*xfrm_policy_clone_security)(struct xfrm_sec_ctx *old_ctx, + struct xfrm_sec_ctx **new_ctx); + void (*xfrm_policy_free_security)(struct xfrm_sec_ctx *ctx); + int (*xfrm_policy_delete_security)(struct xfrm_sec_ctx *ctx); + int (*xfrm_state_alloc)(struct xfrm_state *x, + struct xfrm_user_sec_ctx *sec_ctx); + int (*xfrm_state_alloc_acquire)(struct xfrm_state *x, + struct xfrm_sec_ctx *polsec, + u32 secid); + void (*xfrm_state_free_security)(struct xfrm_state *x); + int (*xfrm_state_delete_security)(struct xfrm_state *x); + int (*xfrm_policy_lookup)(struct xfrm_sec_ctx *ctx, u32 fl_secid, + u8 dir); + int (*xfrm_state_pol_flow_match)(struct xfrm_state *x, + struct xfrm_policy *xp, + const struct flowi *fl); + int (*xfrm_decode_session)(struct sk_buff *skb, u32 *secid, int ckall); +#endif /* CONFIG_SECURITY_NETWORK_XFRM */ + + /* key management security hooks */ +#ifdef CONFIG_KEYS + int (*key_alloc)(struct key *key, const struct cred *cred, + unsigned long flags); + void (*key_free)(struct key *key); + int (*key_permission)(key_ref_t key_ref, const struct cred *cred, + unsigned perm); + int (*key_getsecurity)(struct key *key, char **_buffer); +#endif /* CONFIG_KEYS */ + +#ifdef CONFIG_AUDIT + int (*audit_rule_init)(u32 field, u32 op, char *rulestr, + void **lsmrule); + int (*audit_rule_known)(struct audit_krule *krule); + int (*audit_rule_match)(u32 secid, u32 field, u32 op, void *lsmrule, + struct audit_context *actx); + void (*audit_rule_free)(void *lsmrule); +#endif /* CONFIG_AUDIT */ +}; + +/* prototypes */ +extern int security_module_enable(struct security_operations *ops); +extern int register_security(struct security_operations *ops); +extern void __init security_fixup_ops(struct security_operations *ops); +extern void reset_security_ops(void); + +#endif /* CONFIG_SECURITY */ + +#endif /* ! __LINUX_LSM_HOOKS_H */ diff --git a/include/linux/security.h b/include/linux/security.h index 18264ea9e314..f3d42c636f27 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -116,8 +116,6 @@ struct seq_file; extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb); -void reset_security_ops(void); - #ifdef CONFIG_MMU extern unsigned long mmap_min_addr; extern unsigned long dac_mmap_min_addr; @@ -1457,312 +1455,9 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @ctxlen points to the place to put the length of @ctx. * This is the main security structure. */ -struct security_operations { - char name[SECURITY_NAME_MAX + 1]; - - int (*binder_set_context_mgr) (struct task_struct *mgr); - int (*binder_transaction) (struct task_struct *from, - struct task_struct *to); - int (*binder_transfer_binder) (struct task_struct *from, - struct task_struct *to); - int (*binder_transfer_file) (struct task_struct *from, - struct task_struct *to, struct file *file); - - int (*ptrace_access_check) (struct task_struct *child, unsigned int mode); - int (*ptrace_traceme) (struct task_struct *parent); - int (*capget) (struct task_struct *target, - kernel_cap_t *effective, - kernel_cap_t *inheritable, kernel_cap_t *permitted); - int (*capset) (struct cred *new, - const struct cred *old, - const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted); - int (*capable) (const struct cred *cred, struct user_namespace *ns, - int cap, int audit); - int (*quotactl) (int cmds, int type, int id, struct super_block *sb); - int (*quota_on) (struct dentry *dentry); - int (*syslog) (int type); - int (*settime) (const struct timespec *ts, const struct timezone *tz); - int (*vm_enough_memory) (struct mm_struct *mm, long pages); - - int (*bprm_set_creds) (struct linux_binprm *bprm); - int (*bprm_check_security) (struct linux_binprm *bprm); - int (*bprm_secureexec) (struct linux_binprm *bprm); - void (*bprm_committing_creds) (struct linux_binprm *bprm); - void (*bprm_committed_creds) (struct linux_binprm *bprm); - - int (*sb_alloc_security) (struct super_block *sb); - void (*sb_free_security) (struct super_block *sb); - int (*sb_copy_data) (char *orig, char *copy); - int (*sb_remount) (struct super_block *sb, void *data); - int (*sb_kern_mount) (struct super_block *sb, int flags, void *data); - int (*sb_show_options) (struct seq_file *m, struct super_block *sb); - int (*sb_statfs) (struct dentry *dentry); - int (*sb_mount) (const char *dev_name, struct path *path, - const char *type, unsigned long flags, void *data); - int (*sb_umount) (struct vfsmount *mnt, int flags); - int (*sb_pivotroot) (struct path *old_path, - struct path *new_path); - int (*sb_set_mnt_opts) (struct super_block *sb, - struct security_mnt_opts *opts, - unsigned long kern_flags, - unsigned long *set_kern_flags); - int (*sb_clone_mnt_opts) (const struct super_block *oldsb, - struct super_block *newsb); - int (*sb_parse_opts_str) (char *options, struct security_mnt_opts *opts); - int (*dentry_init_security) (struct dentry *dentry, int mode, - struct qstr *name, void **ctx, - u32 *ctxlen); - - -#ifdef CONFIG_SECURITY_PATH - int (*path_unlink) (struct path *dir, struct dentry *dentry); - int (*path_mkdir) (struct path *dir, struct dentry *dentry, umode_t mode); - int (*path_rmdir) (struct path *dir, struct dentry *dentry); - int (*path_mknod) (struct path *dir, struct dentry *dentry, umode_t mode, - unsigned int dev); - int (*path_truncate) (struct path *path); - int (*path_symlink) (struct path *dir, struct dentry *dentry, - const char *old_name); - int (*path_link) (struct dentry *old_dentry, struct path *new_dir, - struct dentry *new_dentry); - int (*path_rename) (struct path *old_dir, struct dentry *old_dentry, - struct path *new_dir, struct dentry *new_dentry); - int (*path_chmod) (struct path *path, umode_t mode); - int (*path_chown) (struct path *path, kuid_t uid, kgid_t gid); - int (*path_chroot) (struct path *path); -#endif - - int (*inode_alloc_security) (struct inode *inode); - void (*inode_free_security) (struct inode *inode); - int (*inode_init_security) (struct inode *inode, struct inode *dir, - const struct qstr *qstr, const char **name, - void **value, size_t *len); - int (*inode_create) (struct inode *dir, - struct dentry *dentry, umode_t mode); - int (*inode_link) (struct dentry *old_dentry, - struct inode *dir, struct dentry *new_dentry); - int (*inode_unlink) (struct inode *dir, struct dentry *dentry); - int (*inode_symlink) (struct inode *dir, - struct dentry *dentry, const char *old_name); - int (*inode_mkdir) (struct inode *dir, struct dentry *dentry, umode_t mode); - int (*inode_rmdir) (struct inode *dir, struct dentry *dentry); - int (*inode_mknod) (struct inode *dir, struct dentry *dentry, - umode_t mode, dev_t dev); - int (*inode_rename) (struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry); - int (*inode_readlink) (struct dentry *dentry); - int (*inode_follow_link) (struct dentry *dentry, struct nameidata *nd); - int (*inode_permission) (struct inode *inode, int mask); - int (*inode_setattr) (struct dentry *dentry, struct iattr *attr); - int (*inode_getattr) (const struct path *path); - int (*inode_setxattr) (struct dentry *dentry, const char *name, - const void *value, size_t size, int flags); - void (*inode_post_setxattr) (struct dentry *dentry, const char *name, - const void *value, size_t size, int flags); - int (*inode_getxattr) (struct dentry *dentry, const char *name); - int (*inode_listxattr) (struct dentry *dentry); - int (*inode_removexattr) (struct dentry *dentry, const char *name); - int (*inode_need_killpriv) (struct dentry *dentry); - int (*inode_killpriv) (struct dentry *dentry); - int (*inode_getsecurity) (const struct inode *inode, const char *name, void **buffer, bool alloc); - int (*inode_setsecurity) (struct inode *inode, const char *name, const void *value, size_t size, int flags); - int (*inode_listsecurity) (struct inode *inode, char *buffer, size_t buffer_size); - void (*inode_getsecid) (const struct inode *inode, u32 *secid); - - int (*file_permission) (struct file *file, int mask); - int (*file_alloc_security) (struct file *file); - void (*file_free_security) (struct file *file); - int (*file_ioctl) (struct file *file, unsigned int cmd, - unsigned long arg); - int (*mmap_addr) (unsigned long addr); - int (*mmap_file) (struct file *file, - unsigned long reqprot, unsigned long prot, - unsigned long flags); - int (*file_mprotect) (struct vm_area_struct *vma, - unsigned long reqprot, - unsigned long prot); - int (*file_lock) (struct file *file, unsigned int cmd); - int (*file_fcntl) (struct file *file, unsigned int cmd, - unsigned long arg); - void (*file_set_fowner) (struct file *file); - int (*file_send_sigiotask) (struct task_struct *tsk, - struct fown_struct *fown, int sig); - int (*file_receive) (struct file *file); - int (*file_open) (struct file *file, const struct cred *cred); - - int (*task_create) (unsigned long clone_flags); - void (*task_free) (struct task_struct *task); - int (*cred_alloc_blank) (struct cred *cred, gfp_t gfp); - void (*cred_free) (struct cred *cred); - int (*cred_prepare)(struct cred *new, const struct cred *old, - gfp_t gfp); - void (*cred_transfer)(struct cred *new, const struct cred *old); - int (*kernel_act_as)(struct cred *new, u32 secid); - int (*kernel_create_files_as)(struct cred *new, struct inode *inode); - int (*kernel_fw_from_file)(struct file *file, char *buf, size_t size); - int (*kernel_module_request)(char *kmod_name); - int (*kernel_module_from_file)(struct file *file); - int (*task_fix_setuid) (struct cred *new, const struct cred *old, - int flags); - int (*task_setpgid) (struct task_struct *p, pid_t pgid); - int (*task_getpgid) (struct task_struct *p); - int (*task_getsid) (struct task_struct *p); - void (*task_getsecid) (struct task_struct *p, u32 *secid); - int (*task_setnice) (struct task_struct *p, int nice); - int (*task_setioprio) (struct task_struct *p, int ioprio); - int (*task_getioprio) (struct task_struct *p); - int (*task_setrlimit) (struct task_struct *p, unsigned int resource, - struct rlimit *new_rlim); - int (*task_setscheduler) (struct task_struct *p); - int (*task_getscheduler) (struct task_struct *p); - int (*task_movememory) (struct task_struct *p); - int (*task_kill) (struct task_struct *p, - struct siginfo *info, int sig, u32 secid); - int (*task_wait) (struct task_struct *p); - int (*task_prctl) (int option, unsigned long arg2, - unsigned long arg3, unsigned long arg4, - unsigned long arg5); - void (*task_to_inode) (struct task_struct *p, struct inode *inode); - - int (*ipc_permission) (struct kern_ipc_perm *ipcp, short flag); - void (*ipc_getsecid) (struct kern_ipc_perm *ipcp, u32 *secid); - - int (*msg_msg_alloc_security) (struct msg_msg *msg); - void (*msg_msg_free_security) (struct msg_msg *msg); - - int (*msg_queue_alloc_security) (struct msg_queue *msq); - void (*msg_queue_free_security) (struct msg_queue *msq); - int (*msg_queue_associate) (struct msg_queue *msq, int msqflg); - int (*msg_queue_msgctl) (struct msg_queue *msq, int cmd); - int (*msg_queue_msgsnd) (struct msg_queue *msq, - struct msg_msg *msg, int msqflg); - int (*msg_queue_msgrcv) (struct msg_queue *msq, - struct msg_msg *msg, - struct task_struct *target, - long type, int mode); - - int (*shm_alloc_security) (struct shmid_kernel *shp); - void (*shm_free_security) (struct shmid_kernel *shp); - int (*shm_associate) (struct shmid_kernel *shp, int shmflg); - int (*shm_shmctl) (struct shmid_kernel *shp, int cmd); - int (*shm_shmat) (struct shmid_kernel *shp, - char __user *shmaddr, int shmflg); - - int (*sem_alloc_security) (struct sem_array *sma); - void (*sem_free_security) (struct sem_array *sma); - int (*sem_associate) (struct sem_array *sma, int semflg); - int (*sem_semctl) (struct sem_array *sma, int cmd); - int (*sem_semop) (struct sem_array *sma, - struct sembuf *sops, unsigned nsops, int alter); - - int (*netlink_send) (struct sock *sk, struct sk_buff *skb); - - void (*d_instantiate) (struct dentry *dentry, struct inode *inode); - - int (*getprocattr) (struct task_struct *p, char *name, char **value); - int (*setprocattr) (struct task_struct *p, char *name, void *value, size_t size); - int (*ismaclabel) (const char *name); - int (*secid_to_secctx) (u32 secid, char **secdata, u32 *seclen); - int (*secctx_to_secid) (const char *secdata, u32 seclen, u32 *secid); - void (*release_secctx) (char *secdata, u32 seclen); - - int (*inode_notifysecctx)(struct inode *inode, void *ctx, u32 ctxlen); - int (*inode_setsecctx)(struct dentry *dentry, void *ctx, u32 ctxlen); - int (*inode_getsecctx)(struct inode *inode, void **ctx, u32 *ctxlen); - -#ifdef CONFIG_SECURITY_NETWORK - int (*unix_stream_connect) (struct sock *sock, struct sock *other, struct sock *newsk); - int (*unix_may_send) (struct socket *sock, struct socket *other); - - int (*socket_create) (int family, int type, int protocol, int kern); - int (*socket_post_create) (struct socket *sock, int family, - int type, int protocol, int kern); - int (*socket_bind) (struct socket *sock, - struct sockaddr *address, int addrlen); - int (*socket_connect) (struct socket *sock, - struct sockaddr *address, int addrlen); - int (*socket_listen) (struct socket *sock, int backlog); - int (*socket_accept) (struct socket *sock, struct socket *newsock); - int (*socket_sendmsg) (struct socket *sock, - struct msghdr *msg, int size); - int (*socket_recvmsg) (struct socket *sock, - struct msghdr *msg, int size, int flags); - int (*socket_getsockname) (struct socket *sock); - int (*socket_getpeername) (struct socket *sock); - int (*socket_getsockopt) (struct socket *sock, int level, int optname); - int (*socket_setsockopt) (struct socket *sock, int level, int optname); - int (*socket_shutdown) (struct socket *sock, int how); - int (*socket_sock_rcv_skb) (struct sock *sk, struct sk_buff *skb); - int (*socket_getpeersec_stream) (struct socket *sock, char __user *optval, int __user *optlen, unsigned len); - int (*socket_getpeersec_dgram) (struct socket *sock, struct sk_buff *skb, u32 *secid); - int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority); - void (*sk_free_security) (struct sock *sk); - void (*sk_clone_security) (const struct sock *sk, struct sock *newsk); - void (*sk_getsecid) (struct sock *sk, u32 *secid); - void (*sock_graft) (struct sock *sk, struct socket *parent); - int (*inet_conn_request) (struct sock *sk, struct sk_buff *skb, - struct request_sock *req); - void (*inet_csk_clone) (struct sock *newsk, const struct request_sock *req); - void (*inet_conn_established) (struct sock *sk, struct sk_buff *skb); - int (*secmark_relabel_packet) (u32 secid); - void (*secmark_refcount_inc) (void); - void (*secmark_refcount_dec) (void); - void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl); - int (*tun_dev_alloc_security) (void **security); - void (*tun_dev_free_security) (void *security); - int (*tun_dev_create) (void); - int (*tun_dev_attach_queue) (void *security); - int (*tun_dev_attach) (struct sock *sk, void *security); - int (*tun_dev_open) (void *security); -#endif /* CONFIG_SECURITY_NETWORK */ - -#ifdef CONFIG_SECURITY_NETWORK_XFRM - int (*xfrm_policy_alloc_security) (struct xfrm_sec_ctx **ctxp, - struct xfrm_user_sec_ctx *sec_ctx, gfp_t gfp); - int (*xfrm_policy_clone_security) (struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctx); - void (*xfrm_policy_free_security) (struct xfrm_sec_ctx *ctx); - int (*xfrm_policy_delete_security) (struct xfrm_sec_ctx *ctx); - int (*xfrm_state_alloc) (struct xfrm_state *x, - struct xfrm_user_sec_ctx *sec_ctx); - int (*xfrm_state_alloc_acquire) (struct xfrm_state *x, - struct xfrm_sec_ctx *polsec, - u32 secid); - void (*xfrm_state_free_security) (struct xfrm_state *x); - int (*xfrm_state_delete_security) (struct xfrm_state *x); - int (*xfrm_policy_lookup) (struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir); - int (*xfrm_state_pol_flow_match) (struct xfrm_state *x, - struct xfrm_policy *xp, - const struct flowi *fl); - int (*xfrm_decode_session) (struct sk_buff *skb, u32 *secid, int ckall); -#endif /* CONFIG_SECURITY_NETWORK_XFRM */ - - /* key management security hooks */ -#ifdef CONFIG_KEYS - int (*key_alloc) (struct key *key, const struct cred *cred, unsigned long flags); - void (*key_free) (struct key *key); - int (*key_permission) (key_ref_t key_ref, - const struct cred *cred, - unsigned perm); - int (*key_getsecurity)(struct key *key, char **_buffer); -#endif /* CONFIG_KEYS */ - -#ifdef CONFIG_AUDIT - int (*audit_rule_init) (u32 field, u32 op, char *rulestr, void **lsmrule); - int (*audit_rule_known) (struct audit_krule *krule); - int (*audit_rule_match) (u32 secid, u32 field, u32 op, void *lsmrule, - struct audit_context *actx); - void (*audit_rule_free) (void *lsmrule); -#endif /* CONFIG_AUDIT */ -}; /* prototypes */ extern int security_init(void); -extern int security_module_enable(struct security_operations *ops); -extern int register_security(struct security_operations *ops); -extern void __init security_fixup_ops(struct security_operations *ops); - /* Security operations */ int security_binder_set_context_mgr(struct task_struct *mgr); -- cgit v1.2.3 From fe7bb272ee72b5cc377e02b556d0d718d12bbede Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Sat, 2 May 2015 15:10:53 -0700 Subject: LSM: Add the comment to lsm_hooks.h Add the large comment describing the content of the security_operations structure to lsm_hooks.h. This wasn't done in the previous (1/7) patch because it would have exceeded the mail list size limits. Signed-off-by: Casey Schaufler Acked-by: John Johansen Acked-by: Kees Cook Acked-by: Paul Moore Acked-by: Stephen Smalley Acked-by: Tetsuo Handa Signed-off-by: James Morris --- include/linux/lsm_hooks.h | 1279 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1279 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index c60f81b2d18c..b4c91de510c2 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -31,6 +31,1285 @@ #ifdef CONFIG_SECURITY +/** + * struct security_operations - main security structure + * + * Security module identifier. + * + * @name: + * A string that acts as a unique identifier for the LSM with max number + * of characters = SECURITY_NAME_MAX. + * + * Security hooks for program execution operations. + * + * @bprm_set_creds: + * Save security information in the bprm->security field, typically based + * on information about the bprm->file, for later use by the apply_creds + * hook. This hook may also optionally check permissions (e.g. for + * transitions between security domains). + * This hook may be called multiple times during a single execve, e.g. for + * interpreters. The hook can tell whether it has already been called by + * checking to see if @bprm->security is non-NULL. If so, then the hook + * may decide either to retain the security information saved earlier or + * to replace it. + * @bprm contains the linux_binprm structure. + * Return 0 if the hook is successful and permission is granted. + * @bprm_check_security: + * This hook mediates the point when a search for a binary handler will + * begin. It allows a check the @bprm->security value which is set in the + * preceding set_creds call. The primary difference from set_creds is + * that the argv list and envp list are reliably available in @bprm. This + * hook may be called multiple times during a single execve; and in each + * pass set_creds is called first. + * @bprm contains the linux_binprm structure. + * Return 0 if the hook is successful and permission is granted. + * @bprm_committing_creds: + * Prepare to install the new security attributes of a process being + * transformed by an execve operation, based on the old credentials + * pointed to by @current->cred and the information set in @bprm->cred by + * the bprm_set_creds hook. @bprm points to the linux_binprm structure. + * This hook is a good place to perform state changes on the process such + * as closing open file descriptors to which access will no longer be + * granted when the attributes are changed. This is called immediately + * before commit_creds(). + * @bprm_committed_creds: + * Tidy up after the installation of the new security attributes of a + * process being transformed by an execve operation. The new credentials + * have, by this point, been set to @current->cred. @bprm points to the + * linux_binprm structure. This hook is a good place to perform state + * changes on the process such as clearing out non-inheritable signal + * state. This is called immediately after commit_creds(). + * @bprm_secureexec: + * Return a boolean value (0 or 1) indicating whether a "secure exec" + * is required. The flag is passed in the auxiliary table + * on the initial stack to the ELF interpreter to indicate whether libc + * should enable secure mode. + * @bprm contains the linux_binprm structure. + * + * Security hooks for filesystem operations. + * + * @sb_alloc_security: + * Allocate and attach a security structure to the sb->s_security field. + * The s_security field is initialized to NULL when the structure is + * allocated. + * @sb contains the super_block structure to be modified. + * Return 0 if operation was successful. + * @sb_free_security: + * Deallocate and clear the sb->s_security field. + * @sb contains the super_block structure to be modified. + * @sb_statfs: + * Check permission before obtaining filesystem statistics for the @mnt + * mountpoint. + * @dentry is a handle on the superblock for the filesystem. + * Return 0 if permission is granted. + * @sb_mount: + * Check permission before an object specified by @dev_name is mounted on + * the mount point named by @nd. For an ordinary mount, @dev_name + * identifies a device if the file system type requires a device. For a + * remount (@flags & MS_REMOUNT), @dev_name is irrelevant. For a + * loopback/bind mount (@flags & MS_BIND), @dev_name identifies the + * pathname of the object being mounted. + * @dev_name contains the name for object being mounted. + * @path contains the path for mount point object. + * @type contains the filesystem type. + * @flags contains the mount flags. + * @data contains the filesystem-specific data. + * Return 0 if permission is granted. + * @sb_copy_data: + * Allow mount option data to be copied prior to parsing by the filesystem, + * so that the security module can extract security-specific mount + * options cleanly (a filesystem may modify the data e.g. with strsep()). + * This also allows the original mount data to be stripped of security- + * specific options to avoid having to make filesystems aware of them. + * @type the type of filesystem being mounted. + * @orig the original mount data copied from userspace. + * @copy copied data which will be passed to the security module. + * Returns 0 if the copy was successful. + * @sb_remount: + * Extracts security system specific mount options and verifies no changes + * are being made to those options. + * @sb superblock being remounted + * @data contains the filesystem-specific data. + * Return 0 if permission is granted. + * @sb_umount: + * Check permission before the @mnt file system is unmounted. + * @mnt contains the mounted file system. + * @flags contains the unmount flags, e.g. MNT_FORCE. + * Return 0 if permission is granted. + * @sb_pivotroot: + * Check permission before pivoting the root filesystem. + * @old_path contains the path for the new location of the + * current root (put_old). + * @new_path contains the path for the new root (new_root). + * Return 0 if permission is granted. + * @sb_set_mnt_opts: + * Set the security relevant mount options used for a superblock + * @sb the superblock to set security mount options for + * @opts binary data structure containing all lsm mount data + * @sb_clone_mnt_opts: + * Copy all security options from a given superblock to another + * @oldsb old superblock which contain information to clone + * @newsb new superblock which needs filled in + * @sb_parse_opts_str: + * Parse a string of security data filling in the opts structure + * @options string containing all mount options known by the LSM + * @opts binary data structure usable by the LSM + * @dentry_init_security: + * Compute a context for a dentry as the inode is not yet available + * since NFSv4 has no label backed by an EA anyway. + * @dentry dentry to use in calculating the context. + * @mode mode used to determine resource type. + * @name name of the last path component used to create file + * @ctx pointer to place the pointer to the resulting context in. + * @ctxlen point to place the length of the resulting context. + * + * + * Security hooks for inode operations. + * + * @inode_alloc_security: + * Allocate and attach a security structure to @inode->i_security. The + * i_security field is initialized to NULL when the inode structure is + * allocated. + * @inode contains the inode structure. + * Return 0 if operation was successful. + * @inode_free_security: + * @inode contains the inode structure. + * Deallocate the inode security structure and set @inode->i_security to + * NULL. + * @inode_init_security: + * Obtain the security attribute name suffix and value to set on a newly + * created inode and set up the incore security field for the new inode. + * This hook is called by the fs code as part of the inode creation + * transaction and provides for atomic labeling of the inode, unlike + * the post_create/mkdir/... hooks called by the VFS. The hook function + * is expected to allocate the name and value via kmalloc, with the caller + * being responsible for calling kfree after using them. + * If the security module does not use security attributes or does + * not wish to put a security attribute on this particular inode, + * then it should return -EOPNOTSUPP to skip this processing. + * @inode contains the inode structure of the newly created inode. + * @dir contains the inode structure of the parent directory. + * @qstr contains the last path component of the new object + * @name will be set to the allocated name suffix (e.g. selinux). + * @value will be set to the allocated attribute value. + * @len will be set to the length of the value. + * Returns 0 if @name and @value have been successfully set, + * -EOPNOTSUPP if no security attribute is needed, or + * -ENOMEM on memory allocation failure. + * @inode_create: + * Check permission to create a regular file. + * @dir contains inode structure of the parent of the new file. + * @dentry contains the dentry structure for the file to be created. + * @mode contains the file mode of the file to be created. + * Return 0 if permission is granted. + * @inode_link: + * Check permission before creating a new hard link to a file. + * @old_dentry contains the dentry structure for an existing + * link to the file. + * @dir contains the inode structure of the parent directory + * of the new link. + * @new_dentry contains the dentry structure for the new link. + * Return 0 if permission is granted. + * @path_link: + * Check permission before creating a new hard link to a file. + * @old_dentry contains the dentry structure for an existing link + * to the file. + * @new_dir contains the path structure of the parent directory of + * the new link. + * @new_dentry contains the dentry structure for the new link. + * Return 0 if permission is granted. + * @inode_unlink: + * Check the permission to remove a hard link to a file. + * @dir contains the inode structure of parent directory of the file. + * @dentry contains the dentry structure for file to be unlinked. + * Return 0 if permission is granted. + * @path_unlink: + * Check the permission to remove a hard link to a file. + * @dir contains the path structure of parent directory of the file. + * @dentry contains the dentry structure for file to be unlinked. + * Return 0 if permission is granted. + * @inode_symlink: + * Check the permission to create a symbolic link to a file. + * @dir contains the inode structure of parent directory of + * the symbolic link. + * @dentry contains the dentry structure of the symbolic link. + * @old_name contains the pathname of file. + * Return 0 if permission is granted. + * @path_symlink: + * Check the permission to create a symbolic link to a file. + * @dir contains the path structure of parent directory of + * the symbolic link. + * @dentry contains the dentry structure of the symbolic link. + * @old_name contains the pathname of file. + * Return 0 if permission is granted. + * @inode_mkdir: + * Check permissions to create a new directory in the existing directory + * associated with inode structure @dir. + * @dir contains the inode structure of parent of the directory + * to be created. + * @dentry contains the dentry structure of new directory. + * @mode contains the mode of new directory. + * Return 0 if permission is granted. + * @path_mkdir: + * Check permissions to create a new directory in the existing directory + * associated with path structure @path. + * @dir contains the path structure of parent of the directory + * to be created. + * @dentry contains the dentry structure of new directory. + * @mode contains the mode of new directory. + * Return 0 if permission is granted. + * @inode_rmdir: + * Check the permission to remove a directory. + * @dir contains the inode structure of parent of the directory + * to be removed. + * @dentry contains the dentry structure of directory to be removed. + * Return 0 if permission is granted. + * @path_rmdir: + * Check the permission to remove a directory. + * @dir contains the path structure of parent of the directory to be + * removed. + * @dentry contains the dentry structure of directory to be removed. + * Return 0 if permission is granted. + * @inode_mknod: + * Check permissions when creating a special file (or a socket or a fifo + * file created via the mknod system call). Note that if mknod operation + * is being done for a regular file, then the create hook will be called + * and not this hook. + * @dir contains the inode structure of parent of the new file. + * @dentry contains the dentry structure of the new file. + * @mode contains the mode of the new file. + * @dev contains the device number. + * Return 0 if permission is granted. + * @path_mknod: + * Check permissions when creating a file. Note that this hook is called + * even if mknod operation is being done for a regular file. + * @dir contains the path structure of parent of the new file. + * @dentry contains the dentry structure of the new file. + * @mode contains the mode of the new file. + * @dev contains the undecoded device number. Use new_decode_dev() to get + * the decoded device number. + * Return 0 if permission is granted. + * @inode_rename: + * Check for permission to rename a file or directory. + * @old_dir contains the inode structure for parent of the old link. + * @old_dentry contains the dentry structure of the old link. + * @new_dir contains the inode structure for parent of the new link. + * @new_dentry contains the dentry structure of the new link. + * Return 0 if permission is granted. + * @path_rename: + * Check for permission to rename a file or directory. + * @old_dir contains the path structure for parent of the old link. + * @old_dentry contains the dentry structure of the old link. + * @new_dir contains the path structure for parent of the new link. + * @new_dentry contains the dentry structure of the new link. + * Return 0 if permission is granted. + * @path_chmod: + * Check for permission to change DAC's permission of a file or directory. + * @dentry contains the dentry structure. + * @mnt contains the vfsmnt structure. + * @mode contains DAC's mode. + * Return 0 if permission is granted. + * @path_chown: + * Check for permission to change owner/group of a file or directory. + * @path contains the path structure. + * @uid contains new owner's ID. + * @gid contains new group's ID. + * Return 0 if permission is granted. + * @path_chroot: + * Check for permission to change root directory. + * @path contains the path structure. + * Return 0 if permission is granted. + * @inode_readlink: + * Check the permission to read the symbolic link. + * @dentry contains the dentry structure for the file link. + * Return 0 if permission is granted. + * @inode_follow_link: + * Check permission to follow a symbolic link when looking up a pathname. + * @dentry contains the dentry structure for the link. + * @nd contains the nameidata structure for the parent directory. + * Return 0 if permission is granted. + * @inode_permission: + * Check permission before accessing an inode. This hook is called by the + * existing Linux permission function, so a security module can use it to + * provide additional checking for existing Linux permission checks. + * Notice that this hook is called when a file is opened (as well as many + * other operations), whereas the file_security_ops permission hook is + * called when the actual read/write operations are performed. + * @inode contains the inode structure to check. + * @mask contains the permission mask. + * Return 0 if permission is granted. + * @inode_setattr: + * Check permission before setting file attributes. Note that the kernel + * call to notify_change is performed from several locations, whenever + * file attributes change (such as when a file is truncated, chown/chmod + * operations, transferring disk quotas, etc). + * @dentry contains the dentry structure for the file. + * @attr is the iattr structure containing the new file attributes. + * Return 0 if permission is granted. + * @path_truncate: + * Check permission before truncating a file. + * @path contains the path structure for the file. + * Return 0 if permission is granted. + * @inode_getattr: + * Check permission before obtaining file attributes. + * @mnt is the vfsmount where the dentry was looked up + * @dentry contains the dentry structure for the file. + * Return 0 if permission is granted. + * @inode_setxattr: + * Check permission before setting the extended attributes + * @value identified by @name for @dentry. + * Return 0 if permission is granted. + * @inode_post_setxattr: + * Update inode security field after successful setxattr operation. + * @value identified by @name for @dentry. + * @inode_getxattr: + * Check permission before obtaining the extended attributes + * identified by @name for @dentry. + * Return 0 if permission is granted. + * @inode_listxattr: + * Check permission before obtaining the list of extended attribute + * names for @dentry. + * Return 0 if permission is granted. + * @inode_removexattr: + * Check permission before removing the extended attribute + * identified by @name for @dentry. + * Return 0 if permission is granted. + * @inode_getsecurity: + * Retrieve a copy of the extended attribute representation of the + * security label associated with @name for @inode via @buffer. Note that + * @name is the remainder of the attribute name after the security prefix + * has been removed. @alloc is used to specify of the call should return a + * value via the buffer or just the value length Return size of buffer on + * success. + * @inode_setsecurity: + * Set the security label associated with @name for @inode from the + * extended attribute value @value. @size indicates the size of the + * @value in bytes. @flags may be XATTR_CREATE, XATTR_REPLACE, or 0. + * Note that @name is the remainder of the attribute name after the + * security. prefix has been removed. + * Return 0 on success. + * @inode_listsecurity: + * Copy the extended attribute names for the security labels + * associated with @inode into @buffer. The maximum size of @buffer + * is specified by @buffer_size. @buffer may be NULL to request + * the size of the buffer required. + * Returns number of bytes used/required on success. + * @inode_need_killpriv: + * Called when an inode has been changed. + * @dentry is the dentry being changed. + * Return <0 on error to abort the inode change operation. + * Return 0 if inode_killpriv does not need to be called. + * Return >0 if inode_killpriv does need to be called. + * @inode_killpriv: + * The setuid bit is being removed. Remove similar security labels. + * Called with the dentry->d_inode->i_mutex held. + * @dentry is the dentry being changed. + * Return 0 on success. If error is returned, then the operation + * causing setuid bit removal is failed. + * @inode_getsecid: + * Get the secid associated with the node. + * @inode contains a pointer to the inode. + * @secid contains a pointer to the location where result will be saved. + * In case of failure, @secid will be set to zero. + * + * Security hooks for file operations + * + * @file_permission: + * Check file permissions before accessing an open file. This hook is + * called by various operations that read or write files. A security + * module can use this hook to perform additional checking on these + * operations, e.g. to revalidate permissions on use to support privilege + * bracketing or policy changes. Notice that this hook is used when the + * actual read/write operations are performed, whereas the + * inode_security_ops hook is called when a file is opened (as well as + * many other operations). + * Caveat: Although this hook can be used to revalidate permissions for + * various system call operations that read or write files, it does not + * address the revalidation of permissions for memory-mapped files. + * Security modules must handle this separately if they need such + * revalidation. + * @file contains the file structure being accessed. + * @mask contains the requested permissions. + * Return 0 if permission is granted. + * @file_alloc_security: + * Allocate and attach a security structure to the file->f_security field. + * The security field is initialized to NULL when the structure is first + * created. + * @file contains the file structure to secure. + * Return 0 if the hook is successful and permission is granted. + * @file_free_security: + * Deallocate and free any security structures stored in file->f_security. + * @file contains the file structure being modified. + * @file_ioctl: + * @file contains the file structure. + * @cmd contains the operation to perform. + * @arg contains the operational arguments. + * Check permission for an ioctl operation on @file. Note that @arg + * sometimes represents a user space pointer; in other cases, it may be a + * simple integer value. When @arg represents a user space pointer, it + * should never be used by the security module. + * Return 0 if permission is granted. + * @mmap_addr : + * Check permissions for a mmap operation at @addr. + * @addr contains virtual address that will be used for the operation. + * Return 0 if permission is granted. + * @mmap_file : + * Check permissions for a mmap operation. The @file may be NULL, e.g. + * if mapping anonymous memory. + * @file contains the file structure for file to map (may be NULL). + * @reqprot contains the protection requested by the application. + * @prot contains the protection that will be applied by the kernel. + * @flags contains the operational flags. + * Return 0 if permission is granted. + * @file_mprotect: + * Check permissions before changing memory access permissions. + * @vma contains the memory region to modify. + * @reqprot contains the protection requested by the application. + * @prot contains the protection that will be applied by the kernel. + * Return 0 if permission is granted. + * @file_lock: + * Check permission before performing file locking operations. + * Note: this hook mediates both flock and fcntl style locks. + * @file contains the file structure. + * @cmd contains the posix-translated lock operation to perform + * (e.g. F_RDLCK, F_WRLCK). + * Return 0 if permission is granted. + * @file_fcntl: + * Check permission before allowing the file operation specified by @cmd + * from being performed on the file @file. Note that @arg sometimes + * represents a user space pointer; in other cases, it may be a simple + * integer value. When @arg represents a user space pointer, it should + * never be used by the security module. + * @file contains the file structure. + * @cmd contains the operation to be performed. + * @arg contains the operational arguments. + * Return 0 if permission is granted. + * @file_set_fowner: + * Save owner security information (typically from current->security) in + * file->f_security for later use by the send_sigiotask hook. + * @file contains the file structure to update. + * Return 0 on success. + * @file_send_sigiotask: + * Check permission for the file owner @fown to send SIGIO or SIGURG to the + * process @tsk. Note that this hook is sometimes called from interrupt. + * Note that the fown_struct, @fown, is never outside the context of a + * struct file, so the file structure (and associated security information) + * can always be obtained: + * container_of(fown, struct file, f_owner) + * @tsk contains the structure of task receiving signal. + * @fown contains the file owner information. + * @sig is the signal that will be sent. When 0, kernel sends SIGIO. + * Return 0 if permission is granted. + * @file_receive: + * This hook allows security modules to control the ability of a process + * to receive an open file descriptor via socket IPC. + * @file contains the file structure being received. + * Return 0 if permission is granted. + * @file_open + * Save open-time permission checking state for later use upon + * file_permission, and recheck access if anything has changed + * since inode_permission. + * + * Security hooks for task operations. + * + * @task_create: + * Check permission before creating a child process. See the clone(2) + * manual page for definitions of the @clone_flags. + * @clone_flags contains the flags indicating what should be shared. + * Return 0 if permission is granted. + * @task_free: + * @task task being freed + * Handle release of task-related resources. (Note that this can be called + * from interrupt context.) + * @cred_alloc_blank: + * @cred points to the credentials. + * @gfp indicates the atomicity of any memory allocations. + * Only allocate sufficient memory and attach to @cred such that + * cred_transfer() will not get ENOMEM. + * @cred_free: + * @cred points to the credentials. + * Deallocate and clear the cred->security field in a set of credentials. + * @cred_prepare: + * @new points to the new credentials. + * @old points to the original credentials. + * @gfp indicates the atomicity of any memory allocations. + * Prepare a new set of credentials by copying the data from the old set. + * @cred_transfer: + * @new points to the new credentials. + * @old points to the original credentials. + * Transfer data from original creds to new creds + * @kernel_act_as: + * Set the credentials for a kernel service to act as (subjective context). + * @new points to the credentials to be modified. + * @secid specifies the security ID to be set + * The current task must be the one that nominated @secid. + * Return 0 if successful. + * @kernel_create_files_as: + * Set the file creation context in a set of credentials to be the same as + * the objective context of the specified inode. + * @new points to the credentials to be modified. + * @inode points to the inode to use as a reference. + * The current task must be the one that nominated @inode. + * Return 0 if successful. + * @kernel_fw_from_file: + * Load firmware from userspace (not called for built-in firmware). + * @file contains the file structure pointing to the file containing + * the firmware to load. This argument will be NULL if the firmware + * was loaded via the uevent-triggered blob-based interface exposed + * by CONFIG_FW_LOADER_USER_HELPER. + * @buf pointer to buffer containing firmware contents. + * @size length of the firmware contents. + * Return 0 if permission is granted. + * @kernel_module_request: + * Ability to trigger the kernel to automatically upcall to userspace for + * userspace to load a kernel module with the given name. + * @kmod_name name of the module requested by the kernel + * Return 0 if successful. + * @kernel_module_from_file: + * Load a kernel module from userspace. + * @file contains the file structure pointing to the file containing + * the kernel module to load. If the module is being loaded from a blob, + * this argument will be NULL. + * Return 0 if permission is granted. + * @task_fix_setuid: + * Update the module's state after setting one or more of the user + * identity attributes of the current process. The @flags parameter + * indicates which of the set*uid system calls invoked this hook. If + * @new is the set of credentials that will be installed. Modifications + * should be made to this rather than to @current->cred. + * @old is the set of credentials that are being replaces + * @flags contains one of the LSM_SETID_* values. + * Return 0 on success. + * @task_setpgid: + * Check permission before setting the process group identifier of the + * process @p to @pgid. + * @p contains the task_struct for process being modified. + * @pgid contains the new pgid. + * Return 0 if permission is granted. + * @task_getpgid: + * Check permission before getting the process group identifier of the + * process @p. + * @p contains the task_struct for the process. + * Return 0 if permission is granted. + * @task_getsid: + * Check permission before getting the session identifier of the process + * @p. + * @p contains the task_struct for the process. + * Return 0 if permission is granted. + * @task_getsecid: + * Retrieve the security identifier of the process @p. + * @p contains the task_struct for the process and place is into @secid. + * In case of failure, @secid will be set to zero. + * + * @task_setnice: + * Check permission before setting the nice value of @p to @nice. + * @p contains the task_struct of process. + * @nice contains the new nice value. + * Return 0 if permission is granted. + * @task_setioprio + * Check permission before setting the ioprio value of @p to @ioprio. + * @p contains the task_struct of process. + * @ioprio contains the new ioprio value + * Return 0 if permission is granted. + * @task_getioprio + * Check permission before getting the ioprio value of @p. + * @p contains the task_struct of process. + * Return 0 if permission is granted. + * @task_setrlimit: + * Check permission before setting the resource limits of the current + * process for @resource to @new_rlim. The old resource limit values can + * be examined by dereferencing (current->signal->rlim + resource). + * @resource contains the resource whose limit is being set. + * @new_rlim contains the new limits for @resource. + * Return 0 if permission is granted. + * @task_setscheduler: + * Check permission before setting scheduling policy and/or parameters of + * process @p based on @policy and @lp. + * @p contains the task_struct for process. + * @policy contains the scheduling policy. + * @lp contains the scheduling parameters. + * Return 0 if permission is granted. + * @task_getscheduler: + * Check permission before obtaining scheduling information for process + * @p. + * @p contains the task_struct for process. + * Return 0 if permission is granted. + * @task_movememory + * Check permission before moving memory owned by process @p. + * @p contains the task_struct for process. + * Return 0 if permission is granted. + * @task_kill: + * Check permission before sending signal @sig to @p. @info can be NULL, + * the constant 1, or a pointer to a siginfo structure. If @info is 1 or + * SI_FROMKERNEL(info) is true, then the signal should be viewed as coming + * from the kernel and should typically be permitted. + * SIGIO signals are handled separately by the send_sigiotask hook in + * file_security_ops. + * @p contains the task_struct for process. + * @info contains the signal information. + * @sig contains the signal value. + * @secid contains the sid of the process where the signal originated + * Return 0 if permission is granted. + * @task_wait: + * Check permission before allowing a process to reap a child process @p + * and collect its status information. + * @p contains the task_struct for process. + * Return 0 if permission is granted. + * @task_prctl: + * Check permission before performing a process control operation on the + * current process. + * @option contains the operation. + * @arg2 contains a argument. + * @arg3 contains a argument. + * @arg4 contains a argument. + * @arg5 contains a argument. + * Return -ENOSYS if no-one wanted to handle this op, any other value to + * cause prctl() to return immediately with that value. + * @task_to_inode: + * Set the security attributes for an inode based on an associated task's + * security attributes, e.g. for /proc/pid inodes. + * @p contains the task_struct for the task. + * @inode contains the inode structure for the inode. + * + * Security hooks for Netlink messaging. + * + * @netlink_send: + * Save security information for a netlink message so that permission + * checking can be performed when the message is processed. The security + * information can be saved using the eff_cap field of the + * netlink_skb_parms structure. Also may be used to provide fine + * grained control over message transmission. + * @sk associated sock of task sending the message. + * @skb contains the sk_buff structure for the netlink message. + * Return 0 if the information was successfully saved and message + * is allowed to be transmitted. + * + * Security hooks for Unix domain networking. + * + * @unix_stream_connect: + * Check permissions before establishing a Unix domain stream connection + * between @sock and @other. + * @sock contains the sock structure. + * @other contains the peer sock structure. + * @newsk contains the new sock structure. + * Return 0 if permission is granted. + * @unix_may_send: + * Check permissions before connecting or sending datagrams from @sock to + * @other. + * @sock contains the socket structure. + * @other contains the peer socket structure. + * Return 0 if permission is granted. + * + * The @unix_stream_connect and @unix_may_send hooks were necessary because + * Linux provides an alternative to the conventional file name space for Unix + * domain sockets. Whereas binding and connecting to sockets in the file name + * space is mediated by the typical file permissions (and caught by the mknod + * and permission hooks in inode_security_ops), binding and connecting to + * sockets in the abstract name space is completely unmediated. Sufficient + * control of Unix domain sockets in the abstract name space isn't possible + * using only the socket layer hooks, since we need to know the actual target + * socket, which is not looked up until we are inside the af_unix code. + * + * Security hooks for socket operations. + * + * @socket_create: + * Check permissions prior to creating a new socket. + * @family contains the requested protocol family. + * @type contains the requested communications type. + * @protocol contains the requested protocol. + * @kern set to 1 if a kernel socket. + * Return 0 if permission is granted. + * @socket_post_create: + * This hook allows a module to update or allocate a per-socket security + * structure. Note that the security field was not added directly to the + * socket structure, but rather, the socket security information is stored + * in the associated inode. Typically, the inode alloc_security hook will + * allocate and and attach security information to + * sock->inode->i_security. This hook may be used to update the + * sock->inode->i_security field with additional information that wasn't + * available when the inode was allocated. + * @sock contains the newly created socket structure. + * @family contains the requested protocol family. + * @type contains the requested communications type. + * @protocol contains the requested protocol. + * @kern set to 1 if a kernel socket. + * @socket_bind: + * Check permission before socket protocol layer bind operation is + * performed and the socket @sock is bound to the address specified in the + * @address parameter. + * @sock contains the socket structure. + * @address contains the address to bind to. + * @addrlen contains the length of address. + * Return 0 if permission is granted. + * @socket_connect: + * Check permission before socket protocol layer connect operation + * attempts to connect socket @sock to a remote address, @address. + * @sock contains the socket structure. + * @address contains the address of remote endpoint. + * @addrlen contains the length of address. + * Return 0 if permission is granted. + * @socket_listen: + * Check permission before socket protocol layer listen operation. + * @sock contains the socket structure. + * @backlog contains the maximum length for the pending connection queue. + * Return 0 if permission is granted. + * @socket_accept: + * Check permission before accepting a new connection. Note that the new + * socket, @newsock, has been created and some information copied to it, + * but the accept operation has not actually been performed. + * @sock contains the listening socket structure. + * @newsock contains the newly created server socket for connection. + * Return 0 if permission is granted. + * @socket_sendmsg: + * Check permission before transmitting a message to another socket. + * @sock contains the socket structure. + * @msg contains the message to be transmitted. + * @size contains the size of message. + * Return 0 if permission is granted. + * @socket_recvmsg: + * Check permission before receiving a message from a socket. + * @sock contains the socket structure. + * @msg contains the message structure. + * @size contains the size of message structure. + * @flags contains the operational flags. + * Return 0 if permission is granted. + * @socket_getsockname: + * Check permission before the local address (name) of the socket object + * @sock is retrieved. + * @sock contains the socket structure. + * Return 0 if permission is granted. + * @socket_getpeername: + * Check permission before the remote address (name) of a socket object + * @sock is retrieved. + * @sock contains the socket structure. + * Return 0 if permission is granted. + * @socket_getsockopt: + * Check permissions before retrieving the options associated with socket + * @sock. + * @sock contains the socket structure. + * @level contains the protocol level to retrieve option from. + * @optname contains the name of option to retrieve. + * Return 0 if permission is granted. + * @socket_setsockopt: + * Check permissions before setting the options associated with socket + * @sock. + * @sock contains the socket structure. + * @level contains the protocol level to set options for. + * @optname contains the name of the option to set. + * Return 0 if permission is granted. + * @socket_shutdown: + * Checks permission before all or part of a connection on the socket + * @sock is shut down. + * @sock contains the socket structure. + * @how contains the flag indicating how future sends and receives + * are handled. + * Return 0 if permission is granted. + * @socket_sock_rcv_skb: + * Check permissions on incoming network packets. This hook is distinct + * from Netfilter's IP input hooks since it is the first time that the + * incoming sk_buff @skb has been associated with a particular socket, @sk. + * Must not sleep inside this hook because some callers hold spinlocks. + * @sk contains the sock (not socket) associated with the incoming sk_buff. + * @skb contains the incoming network data. + * @socket_getpeersec_stream: + * This hook allows the security module to provide peer socket security + * state for unix or connected tcp sockets to userspace via getsockopt + * SO_GETPEERSEC. For tcp sockets this can be meaningful if the + * socket is associated with an ipsec SA. + * @sock is the local socket. + * @optval userspace memory where the security state is to be copied. + * @optlen userspace int where the module should copy the actual length + * of the security state. + * @len as input is the maximum length to copy to userspace provided + * by the caller. + * Return 0 if all is well, otherwise, typical getsockopt return + * values. + * @socket_getpeersec_dgram: + * This hook allows the security module to provide peer socket security + * state for udp sockets on a per-packet basis to userspace via + * getsockopt SO_GETPEERSEC. The application must first have indicated + * the IP_PASSSEC option via getsockopt. It can then retrieve the + * security state returned by this hook for a packet via the SCM_SECURITY + * ancillary message type. + * @skb is the skbuff for the packet being queried + * @secdata is a pointer to a buffer in which to copy the security data + * @seclen is the maximum length for @secdata + * Return 0 on success, error on failure. + * @sk_alloc_security: + * Allocate and attach a security structure to the sk->sk_security field, + * which is used to copy security attributes between local stream sockets. + * @sk_free_security: + * Deallocate security structure. + * @sk_clone_security: + * Clone/copy security structure. + * @sk_getsecid: + * Retrieve the LSM-specific secid for the sock to enable caching + * of network authorizations. + * @sock_graft: + * Sets the socket's isec sid to the sock's sid. + * @inet_conn_request: + * Sets the openreq's sid to socket's sid with MLS portion taken + * from peer sid. + * @inet_csk_clone: + * Sets the new child socket's sid to the openreq sid. + * @inet_conn_established: + * Sets the connection's peersid to the secmark on skb. + * @secmark_relabel_packet: + * check if the process should be allowed to relabel packets to + * the given secid + * @security_secmark_refcount_inc + * tells the LSM to increment the number of secmark labeling rules loaded + * @security_secmark_refcount_dec + * tells the LSM to decrement the number of secmark labeling rules loaded + * @req_classify_flow: + * Sets the flow's sid to the openreq sid. + * @tun_dev_alloc_security: + * This hook allows a module to allocate a security structure for a TUN + * device. + * @security pointer to a security structure pointer. + * Returns a zero on success, negative values on failure. + * @tun_dev_free_security: + * This hook allows a module to free the security structure for a TUN + * device. + * @security pointer to the TUN device's security structure + * @tun_dev_create: + * Check permissions prior to creating a new TUN device. + * @tun_dev_attach_queue: + * Check permissions prior to attaching to a TUN device queue. + * @security pointer to the TUN device's security structure. + * @tun_dev_attach: + * This hook can be used by the module to update any security state + * associated with the TUN device's sock structure. + * @sk contains the existing sock structure. + * @security pointer to the TUN device's security structure. + * @tun_dev_open: + * This hook can be used by the module to update any security state + * associated with the TUN device's security structure. + * @security pointer to the TUN devices's security structure. + * + * Security hooks for XFRM operations. + * + * @xfrm_policy_alloc_security: + * @ctxp is a pointer to the xfrm_sec_ctx being added to Security Policy + * Database used by the XFRM system. + * @sec_ctx contains the security context information being provided by + * the user-level policy update program (e.g., setkey). + * Allocate a security structure to the xp->security field; the security + * field is initialized to NULL when the xfrm_policy is allocated. + * Return 0 if operation was successful (memory to allocate, legal context) + * @gfp is to specify the context for the allocation + * @xfrm_policy_clone_security: + * @old_ctx contains an existing xfrm_sec_ctx. + * @new_ctxp contains a new xfrm_sec_ctx being cloned from old. + * Allocate a security structure in new_ctxp that contains the + * information from the old_ctx structure. + * Return 0 if operation was successful (memory to allocate). + * @xfrm_policy_free_security: + * @ctx contains the xfrm_sec_ctx + * Deallocate xp->security. + * @xfrm_policy_delete_security: + * @ctx contains the xfrm_sec_ctx. + * Authorize deletion of xp->security. + * @xfrm_state_alloc: + * @x contains the xfrm_state being added to the Security Association + * Database by the XFRM system. + * @sec_ctx contains the security context information being provided by + * the user-level SA generation program (e.g., setkey or racoon). + * Allocate a security structure to the x->security field; the security + * field is initialized to NULL when the xfrm_state is allocated. Set the + * context to correspond to sec_ctx. Return 0 if operation was successful + * (memory to allocate, legal context). + * @xfrm_state_alloc_acquire: + * @x contains the xfrm_state being added to the Security Association + * Database by the XFRM system. + * @polsec contains the policy's security context. + * @secid contains the secid from which to take the mls portion of the + * context. + * Allocate a security structure to the x->security field; the security + * field is initialized to NULL when the xfrm_state is allocated. Set the + * context to correspond to secid. Return 0 if operation was successful + * (memory to allocate, legal context). + * @xfrm_state_free_security: + * @x contains the xfrm_state. + * Deallocate x->security. + * @xfrm_state_delete_security: + * @x contains the xfrm_state. + * Authorize deletion of x->security. + * @xfrm_policy_lookup: + * @ctx contains the xfrm_sec_ctx for which the access control is being + * checked. + * @fl_secid contains the flow security label that is used to authorize + * access to the policy xp. + * @dir contains the direction of the flow (input or output). + * Check permission when a flow selects a xfrm_policy for processing + * XFRMs on a packet. The hook is called when selecting either a + * per-socket policy or a generic xfrm policy. + * Return 0 if permission is granted, -ESRCH otherwise, or -errno + * on other errors. + * @xfrm_state_pol_flow_match: + * @x contains the state to match. + * @xp contains the policy to check for a match. + * @fl contains the flow to check for a match. + * Return 1 if there is a match. + * @xfrm_decode_session: + * @skb points to skb to decode. + * @secid points to the flow key secid to set. + * @ckall says if all xfrms used should be checked for same secid. + * Return 0 if ckall is zero or all xfrms used have the same secid. + * + * Security hooks affecting all Key Management operations + * + * @key_alloc: + * Permit allocation of a key and assign security data. Note that key does + * not have a serial number assigned at this point. + * @key points to the key. + * @flags is the allocation flags + * Return 0 if permission is granted, -ve error otherwise. + * @key_free: + * Notification of destruction; free security data. + * @key points to the key. + * No return value. + * @key_permission: + * See whether a specific operational right is granted to a process on a + * key. + * @key_ref refers to the key (key pointer + possession attribute bit). + * @cred points to the credentials to provide the context against which to + * evaluate the security data on the key. + * @perm describes the combination of permissions required of this key. + * Return 0 if permission is granted, -ve error otherwise. + * @key_getsecurity: + * Get a textual representation of the security context attached to a key + * for the purposes of honouring KEYCTL_GETSECURITY. This function + * allocates the storage for the NUL-terminated string and the caller + * should free it. + * @key points to the key to be queried. + * @_buffer points to a pointer that should be set to point to the + * resulting string (if no label or an error occurs). + * Return the length of the string (including terminating NUL) or -ve if + * an error. + * May also return 0 (and a NULL buffer pointer) if there is no label. + * + * Security hooks affecting all System V IPC operations. + * + * @ipc_permission: + * Check permissions for access to IPC + * @ipcp contains the kernel IPC permission structure + * @flag contains the desired (requested) permission set + * Return 0 if permission is granted. + * @ipc_getsecid: + * Get the secid associated with the ipc object. + * @ipcp contains the kernel IPC permission structure. + * @secid contains a pointer to the location where result will be saved. + * In case of failure, @secid will be set to zero. + * + * Security hooks for individual messages held in System V IPC message queues + * @msg_msg_alloc_security: + * Allocate and attach a security structure to the msg->security field. + * The security field is initialized to NULL when the structure is first + * created. + * @msg contains the message structure to be modified. + * Return 0 if operation was successful and permission is granted. + * @msg_msg_free_security: + * Deallocate the security structure for this message. + * @msg contains the message structure to be modified. + * + * Security hooks for System V IPC Message Queues + * + * @msg_queue_alloc_security: + * Allocate and attach a security structure to the + * msq->q_perm.security field. The security field is initialized to + * NULL when the structure is first created. + * @msq contains the message queue structure to be modified. + * Return 0 if operation was successful and permission is granted. + * @msg_queue_free_security: + * Deallocate security structure for this message queue. + * @msq contains the message queue structure to be modified. + * @msg_queue_associate: + * Check permission when a message queue is requested through the + * msgget system call. This hook is only called when returning the + * message queue identifier for an existing message queue, not when a + * new message queue is created. + * @msq contains the message queue to act upon. + * @msqflg contains the operation control flags. + * Return 0 if permission is granted. + * @msg_queue_msgctl: + * Check permission when a message control operation specified by @cmd + * is to be performed on the message queue @msq. + * The @msq may be NULL, e.g. for IPC_INFO or MSG_INFO. + * @msq contains the message queue to act upon. May be NULL. + * @cmd contains the operation to be performed. + * Return 0 if permission is granted. + * @msg_queue_msgsnd: + * Check permission before a message, @msg, is enqueued on the message + * queue, @msq. + * @msq contains the message queue to send message to. + * @msg contains the message to be enqueued. + * @msqflg contains operational flags. + * Return 0 if permission is granted. + * @msg_queue_msgrcv: + * Check permission before a message, @msg, is removed from the message + * queue, @msq. The @target task structure contains a pointer to the + * process that will be receiving the message (not equal to the current + * process when inline receives are being performed). + * @msq contains the message queue to retrieve message from. + * @msg contains the message destination. + * @target contains the task structure for recipient process. + * @type contains the type of message requested. + * @mode contains the operational flags. + * Return 0 if permission is granted. + * + * Security hooks for System V Shared Memory Segments + * + * @shm_alloc_security: + * Allocate and attach a security structure to the shp->shm_perm.security + * field. The security field is initialized to NULL when the structure is + * first created. + * @shp contains the shared memory structure to be modified. + * Return 0 if operation was successful and permission is granted. + * @shm_free_security: + * Deallocate the security struct for this memory segment. + * @shp contains the shared memory structure to be modified. + * @shm_associate: + * Check permission when a shared memory region is requested through the + * shmget system call. This hook is only called when returning the shared + * memory region identifier for an existing region, not when a new shared + * memory region is created. + * @shp contains the shared memory structure to be modified. + * @shmflg contains the operation control flags. + * Return 0 if permission is granted. + * @shm_shmctl: + * Check permission when a shared memory control operation specified by + * @cmd is to be performed on the shared memory region @shp. + * The @shp may be NULL, e.g. for IPC_INFO or SHM_INFO. + * @shp contains shared memory structure to be modified. + * @cmd contains the operation to be performed. + * Return 0 if permission is granted. + * @shm_shmat: + * Check permissions prior to allowing the shmat system call to attach the + * shared memory segment @shp to the data segment of the calling process. + * The attaching address is specified by @shmaddr. + * @shp contains the shared memory structure to be modified. + * @shmaddr contains the address to attach memory region to. + * @shmflg contains the operational flags. + * Return 0 if permission is granted. + * + * Security hooks for System V Semaphores + * + * @sem_alloc_security: + * Allocate and attach a security structure to the sma->sem_perm.security + * field. The security field is initialized to NULL when the structure is + * first created. + * @sma contains the semaphore structure + * Return 0 if operation was successful and permission is granted. + * @sem_free_security: + * deallocate security struct for this semaphore + * @sma contains the semaphore structure. + * @sem_associate: + * Check permission when a semaphore is requested through the semget + * system call. This hook is only called when returning the semaphore + * identifier for an existing semaphore, not when a new one must be + * created. + * @sma contains the semaphore structure. + * @semflg contains the operation control flags. + * Return 0 if permission is granted. + * @sem_semctl: + * Check permission when a semaphore operation specified by @cmd is to be + * performed on the semaphore @sma. The @sma may be NULL, e.g. for + * IPC_INFO or SEM_INFO. + * @sma contains the semaphore structure. May be NULL. + * @cmd contains the operation to be performed. + * Return 0 if permission is granted. + * @sem_semop + * Check permissions before performing operations on members of the + * semaphore set @sma. If the @alter flag is nonzero, the semaphore set + * may be modified. + * @sma contains the semaphore structure. + * @sops contains the operations to perform. + * @nsops contains the number of operations to perform. + * @alter contains the flag indicating whether changes are to be made. + * Return 0 if permission is granted. + * + * @binder_set_context_mgr + * Check whether @mgr is allowed to be the binder context manager. + * @mgr contains the task_struct for the task being registered. + * Return 0 if permission is granted. + * @binder_transaction + * Check whether @from is allowed to invoke a binder transaction call + * to @to. + * @from contains the task_struct for the sending task. + * @to contains the task_struct for the receiving task. + * @binder_transfer_binder + * Check whether @from is allowed to transfer a binder reference to @to. + * @from contains the task_struct for the sending task. + * @to contains the task_struct for the receiving task. + * @binder_transfer_file + * Check whether @from is allowed to transfer @file to @to. + * @from contains the task_struct for the sending task. + * @file contains the struct file being transferred. + * @to contains the task_struct for the receiving task. + * + * @ptrace_access_check: + * Check permission before allowing the current process to trace the + * @child process. + * Security modules may also want to perform a process tracing check + * during an execve in the set_security or apply_creds hooks of + * tracing check during an execve in the bprm_set_creds hook of + * binprm_security_ops if the process is being traced and its security + * attributes would be changed by the execve. + * @child contains the task_struct structure for the target process. + * @mode contains the PTRACE_MODE flags indicating the form of access. + * Return 0 if permission is granted. + * @ptrace_traceme: + * Check that the @parent process has sufficient permission to trace the + * current process before allowing the current process to present itself + * to the @parent process for tracing. + * @parent contains the task_struct structure for debugger process. + * Return 0 if permission is granted. + * @capget: + * Get the @effective, @inheritable, and @permitted capability sets for + * the @target process. The hook may also perform permission checking to + * determine if the current process is allowed to see the capability sets + * of the @target process. + * @target contains the task_struct structure for target process. + * @effective contains the effective capability set. + * @inheritable contains the inheritable capability set. + * @permitted contains the permitted capability set. + * Return 0 if the capability sets were successfully obtained. + * @capset: + * Set the @effective, @inheritable, and @permitted capability sets for + * the current process. + * @new contains the new credentials structure for target process. + * @old contains the current credentials structure for target process. + * @effective contains the effective capability set. + * @inheritable contains the inheritable capability set. + * @permitted contains the permitted capability set. + * Return 0 and update @new if permission is granted. + * @capable: + * Check whether the @tsk process has the @cap capability in the indicated + * credentials. + * @cred contains the credentials to use. + * @ns contains the user namespace we want the capability in + * @cap contains the capability . + * @audit: Whether to write an audit message or not + * Return 0 if the capability is granted for @tsk. + * @syslog: + * Check permission before accessing the kernel message ring or changing + * logging to the console. + * See the syslog(2) manual page for an explanation of the @type values. + * @type contains the type of action. + * @from_file indicates the context of action (if it came from /proc). + * Return 0 if permission is granted. + * @settime: + * Check permission to change the system time. + * struct timespec and timezone are defined in include/linux/time.h + * @ts contains new time + * @tz contains new timezone + * Return 0 if permission is granted. + * @vm_enough_memory: + * Check permissions for allocating a new virtual mapping. + * @mm contains the mm struct it is being added to. + * @pages contains the number of pages. + * Return 0 if permission is granted. + * + * @ismaclabel: + * Check if the extended attribute specified by @name + * represents a MAC label. Returns 1 if name is a MAC + * attribute otherwise returns 0. + * @name full extended attribute name to check against + * LSM as a MAC label. + * + * @secid_to_secctx: + * Convert secid to security context. If secdata is NULL the length of + * the result will be returned in seclen, but no secdata will be returned. + * This does mean that the length could change between calls to check the + * length and the next call which actually allocates and returns the + * secdata. + * @secid contains the security ID. + * @secdata contains the pointer that stores the converted security + * context. + * @seclen pointer which contains the length of the data + * @secctx_to_secid: + * Convert security context to secid. + * @secid contains the pointer to the generated security ID. + * @secdata contains the security context. + * + * @release_secctx: + * Release the security context. + * @secdata contains the security context. + * @seclen contains the length of the security context. + * + * Security hooks for Audit + * + * @audit_rule_init: + * Allocate and initialize an LSM audit rule structure. + * @field contains the required Audit action. + * Fields flags are defined in include/linux/audit.h + * @op contains the operator the rule uses. + * @rulestr contains the context where the rule will be applied to. + * @lsmrule contains a pointer to receive the result. + * Return 0 if @lsmrule has been successfully set, + * -EINVAL in case of an invalid rule. + * + * @audit_rule_known: + * Specifies whether given @rule contains any fields related to + * current LSM. + * @rule contains the audit rule of interest. + * Return 1 in case of relation found, 0 otherwise. + * + * @audit_rule_match: + * Determine if given @secid matches a rule previously approved + * by @audit_rule_known. + * @secid contains the security id in question. + * @field contains the field which relates to current LSM. + * @op contains the operator that will be used for matching. + * @rule points to the audit rule that will be checked against. + * @actx points to the audit context associated with the check. + * Return 1 if secid matches the rule, 0 if it does not, -ERRNO on failure. + * + * @audit_rule_free: + * Deallocate the LSM audit rule structure previously allocated by + * audit_rule_init. + * @rule contains the allocated rule + * + * @inode_notifysecctx: + * Notify the security module of what the security context of an inode + * should be. Initializes the incore security context managed by the + * security module for this inode. Example usage: NFS client invokes + * this hook to initialize the security context in its incore inode to the + * value provided by the server for the file when the server returned the + * file's attributes to the client. + * + * Must be called with inode->i_mutex locked. + * + * @inode we wish to set the security context of. + * @ctx contains the string which we wish to set in the inode. + * @ctxlen contains the length of @ctx. + * + * @inode_setsecctx: + * Change the security context of an inode. Updates the + * incore security context managed by the security module and invokes the + * fs code as needed (via __vfs_setxattr_noperm) to update any backing + * xattrs that represent the context. Example usage: NFS server invokes + * this hook to change the security context in its incore inode and on the + * backing filesystem to a value provided by the client on a SETATTR + * operation. + * + * Must be called with inode->i_mutex locked. + * + * @dentry contains the inode we wish to set the security context of. + * @ctx contains the string which we wish to set in the inode. + * @ctxlen contains the length of @ctx. + * + * @inode_getsecctx: + * On success, returns 0 and fills out @ctx and @ctxlen with the security + * context for the given @inode. + * + * @inode we wish to get the security context of. + * @ctx is a pointer in which to place the allocated security context. + * @ctxlen points to the place to put the length of @ctx. + * This is the main security structure. + */ + struct security_operations { char name[SECURITY_NAME_MAX + 1]; -- cgit v1.2.3 From 346033a28fb16b83dac2a74d8025ff8ee64a2c9b Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Sat, 2 May 2015 15:11:14 -0700 Subject: LSM: Remove a comment from security.h Remove the large comment describing the content of the security_operations structure from security.h. This wasn't done in the previous (2/7) patch because it would have exceeded the mail list size limits. Signed-off-by: Casey Schaufler Acked-by: John Johansen Acked-by: Kees Cook Acked-by: Paul Moore Acked-by: Stephen Smalley Acked-by: Tetsuo Handa Signed-off-by: James Morris --- include/linux/security.h | 1270 ---------------------------------------------- 1 file changed, 1270 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index f3d42c636f27..a2a100e7ac6e 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -186,1276 +186,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) opts->num_mnt_opts = 0; } -/** - * struct security_operations - main security structure - * - * Security module identifier. - * - * @name: - * A string that acts as a unique identifier for the LSM with max number - * of characters = SECURITY_NAME_MAX. - * - * Security hooks for program execution operations. - * - * @bprm_set_creds: - * Save security information in the bprm->security field, typically based - * on information about the bprm->file, for later use by the apply_creds - * hook. This hook may also optionally check permissions (e.g. for - * transitions between security domains). - * This hook may be called multiple times during a single execve, e.g. for - * interpreters. The hook can tell whether it has already been called by - * checking to see if @bprm->security is non-NULL. If so, then the hook - * may decide either to retain the security information saved earlier or - * to replace it. - * @bprm contains the linux_binprm structure. - * Return 0 if the hook is successful and permission is granted. - * @bprm_check_security: - * This hook mediates the point when a search for a binary handler will - * begin. It allows a check the @bprm->security value which is set in the - * preceding set_creds call. The primary difference from set_creds is - * that the argv list and envp list are reliably available in @bprm. This - * hook may be called multiple times during a single execve; and in each - * pass set_creds is called first. - * @bprm contains the linux_binprm structure. - * Return 0 if the hook is successful and permission is granted. - * @bprm_committing_creds: - * Prepare to install the new security attributes of a process being - * transformed by an execve operation, based on the old credentials - * pointed to by @current->cred and the information set in @bprm->cred by - * the bprm_set_creds hook. @bprm points to the linux_binprm structure. - * This hook is a good place to perform state changes on the process such - * as closing open file descriptors to which access will no longer be - * granted when the attributes are changed. This is called immediately - * before commit_creds(). - * @bprm_committed_creds: - * Tidy up after the installation of the new security attributes of a - * process being transformed by an execve operation. The new credentials - * have, by this point, been set to @current->cred. @bprm points to the - * linux_binprm structure. This hook is a good place to perform state - * changes on the process such as clearing out non-inheritable signal - * state. This is called immediately after commit_creds(). - * @bprm_secureexec: - * Return a boolean value (0 or 1) indicating whether a "secure exec" - * is required. The flag is passed in the auxiliary table - * on the initial stack to the ELF interpreter to indicate whether libc - * should enable secure mode. - * @bprm contains the linux_binprm structure. - * - * Security hooks for filesystem operations. - * - * @sb_alloc_security: - * Allocate and attach a security structure to the sb->s_security field. - * The s_security field is initialized to NULL when the structure is - * allocated. - * @sb contains the super_block structure to be modified. - * Return 0 if operation was successful. - * @sb_free_security: - * Deallocate and clear the sb->s_security field. - * @sb contains the super_block structure to be modified. - * @sb_statfs: - * Check permission before obtaining filesystem statistics for the @mnt - * mountpoint. - * @dentry is a handle on the superblock for the filesystem. - * Return 0 if permission is granted. - * @sb_mount: - * Check permission before an object specified by @dev_name is mounted on - * the mount point named by @nd. For an ordinary mount, @dev_name - * identifies a device if the file system type requires a device. For a - * remount (@flags & MS_REMOUNT), @dev_name is irrelevant. For a - * loopback/bind mount (@flags & MS_BIND), @dev_name identifies the - * pathname of the object being mounted. - * @dev_name contains the name for object being mounted. - * @path contains the path for mount point object. - * @type contains the filesystem type. - * @flags contains the mount flags. - * @data contains the filesystem-specific data. - * Return 0 if permission is granted. - * @sb_copy_data: - * Allow mount option data to be copied prior to parsing by the filesystem, - * so that the security module can extract security-specific mount - * options cleanly (a filesystem may modify the data e.g. with strsep()). - * This also allows the original mount data to be stripped of security- - * specific options to avoid having to make filesystems aware of them. - * @type the type of filesystem being mounted. - * @orig the original mount data copied from userspace. - * @copy copied data which will be passed to the security module. - * Returns 0 if the copy was successful. - * @sb_remount: - * Extracts security system specific mount options and verifies no changes - * are being made to those options. - * @sb superblock being remounted - * @data contains the filesystem-specific data. - * Return 0 if permission is granted. - * @sb_umount: - * Check permission before the @mnt file system is unmounted. - * @mnt contains the mounted file system. - * @flags contains the unmount flags, e.g. MNT_FORCE. - * Return 0 if permission is granted. - * @sb_pivotroot: - * Check permission before pivoting the root filesystem. - * @old_path contains the path for the new location of the current root (put_old). - * @new_path contains the path for the new root (new_root). - * Return 0 if permission is granted. - * @sb_set_mnt_opts: - * Set the security relevant mount options used for a superblock - * @sb the superblock to set security mount options for - * @opts binary data structure containing all lsm mount data - * @sb_clone_mnt_opts: - * Copy all security options from a given superblock to another - * @oldsb old superblock which contain information to clone - * @newsb new superblock which needs filled in - * @sb_parse_opts_str: - * Parse a string of security data filling in the opts structure - * @options string containing all mount options known by the LSM - * @opts binary data structure usable by the LSM - * @dentry_init_security: - * Compute a context for a dentry as the inode is not yet available - * since NFSv4 has no label backed by an EA anyway. - * @dentry dentry to use in calculating the context. - * @mode mode used to determine resource type. - * @name name of the last path component used to create file - * @ctx pointer to place the pointer to the resulting context in. - * @ctxlen point to place the length of the resulting context. - * - * - * Security hooks for inode operations. - * - * @inode_alloc_security: - * Allocate and attach a security structure to @inode->i_security. The - * i_security field is initialized to NULL when the inode structure is - * allocated. - * @inode contains the inode structure. - * Return 0 if operation was successful. - * @inode_free_security: - * @inode contains the inode structure. - * Deallocate the inode security structure and set @inode->i_security to - * NULL. - * @inode_init_security: - * Obtain the security attribute name suffix and value to set on a newly - * created inode and set up the incore security field for the new inode. - * This hook is called by the fs code as part of the inode creation - * transaction and provides for atomic labeling of the inode, unlike - * the post_create/mkdir/... hooks called by the VFS. The hook function - * is expected to allocate the name and value via kmalloc, with the caller - * being responsible for calling kfree after using them. - * If the security module does not use security attributes or does - * not wish to put a security attribute on this particular inode, - * then it should return -EOPNOTSUPP to skip this processing. - * @inode contains the inode structure of the newly created inode. - * @dir contains the inode structure of the parent directory. - * @qstr contains the last path component of the new object - * @name will be set to the allocated name suffix (e.g. selinux). - * @value will be set to the allocated attribute value. - * @len will be set to the length of the value. - * Returns 0 if @name and @value have been successfully set, - * -EOPNOTSUPP if no security attribute is needed, or - * -ENOMEM on memory allocation failure. - * @inode_create: - * Check permission to create a regular file. - * @dir contains inode structure of the parent of the new file. - * @dentry contains the dentry structure for the file to be created. - * @mode contains the file mode of the file to be created. - * Return 0 if permission is granted. - * @inode_link: - * Check permission before creating a new hard link to a file. - * @old_dentry contains the dentry structure for an existing link to the file. - * @dir contains the inode structure of the parent directory of the new link. - * @new_dentry contains the dentry structure for the new link. - * Return 0 if permission is granted. - * @path_link: - * Check permission before creating a new hard link to a file. - * @old_dentry contains the dentry structure for an existing link - * to the file. - * @new_dir contains the path structure of the parent directory of - * the new link. - * @new_dentry contains the dentry structure for the new link. - * Return 0 if permission is granted. - * @inode_unlink: - * Check the permission to remove a hard link to a file. - * @dir contains the inode structure of parent directory of the file. - * @dentry contains the dentry structure for file to be unlinked. - * Return 0 if permission is granted. - * @path_unlink: - * Check the permission to remove a hard link to a file. - * @dir contains the path structure of parent directory of the file. - * @dentry contains the dentry structure for file to be unlinked. - * Return 0 if permission is granted. - * @inode_symlink: - * Check the permission to create a symbolic link to a file. - * @dir contains the inode structure of parent directory of the symbolic link. - * @dentry contains the dentry structure of the symbolic link. - * @old_name contains the pathname of file. - * Return 0 if permission is granted. - * @path_symlink: - * Check the permission to create a symbolic link to a file. - * @dir contains the path structure of parent directory of - * the symbolic link. - * @dentry contains the dentry structure of the symbolic link. - * @old_name contains the pathname of file. - * Return 0 if permission is granted. - * @inode_mkdir: - * Check permissions to create a new directory in the existing directory - * associated with inode structure @dir. - * @dir contains the inode structure of parent of the directory to be created. - * @dentry contains the dentry structure of new directory. - * @mode contains the mode of new directory. - * Return 0 if permission is granted. - * @path_mkdir: - * Check permissions to create a new directory in the existing directory - * associated with path structure @path. - * @dir contains the path structure of parent of the directory - * to be created. - * @dentry contains the dentry structure of new directory. - * @mode contains the mode of new directory. - * Return 0 if permission is granted. - * @inode_rmdir: - * Check the permission to remove a directory. - * @dir contains the inode structure of parent of the directory to be removed. - * @dentry contains the dentry structure of directory to be removed. - * Return 0 if permission is granted. - * @path_rmdir: - * Check the permission to remove a directory. - * @dir contains the path structure of parent of the directory to be - * removed. - * @dentry contains the dentry structure of directory to be removed. - * Return 0 if permission is granted. - * @inode_mknod: - * Check permissions when creating a special file (or a socket or a fifo - * file created via the mknod system call). Note that if mknod operation - * is being done for a regular file, then the create hook will be called - * and not this hook. - * @dir contains the inode structure of parent of the new file. - * @dentry contains the dentry structure of the new file. - * @mode contains the mode of the new file. - * @dev contains the device number. - * Return 0 if permission is granted. - * @path_mknod: - * Check permissions when creating a file. Note that this hook is called - * even if mknod operation is being done for a regular file. - * @dir contains the path structure of parent of the new file. - * @dentry contains the dentry structure of the new file. - * @mode contains the mode of the new file. - * @dev contains the undecoded device number. Use new_decode_dev() to get - * the decoded device number. - * Return 0 if permission is granted. - * @inode_rename: - * Check for permission to rename a file or directory. - * @old_dir contains the inode structure for parent of the old link. - * @old_dentry contains the dentry structure of the old link. - * @new_dir contains the inode structure for parent of the new link. - * @new_dentry contains the dentry structure of the new link. - * Return 0 if permission is granted. - * @path_rename: - * Check for permission to rename a file or directory. - * @old_dir contains the path structure for parent of the old link. - * @old_dentry contains the dentry structure of the old link. - * @new_dir contains the path structure for parent of the new link. - * @new_dentry contains the dentry structure of the new link. - * Return 0 if permission is granted. - * @path_chmod: - * Check for permission to change DAC's permission of a file or directory. - * @dentry contains the dentry structure. - * @mnt contains the vfsmnt structure. - * @mode contains DAC's mode. - * Return 0 if permission is granted. - * @path_chown: - * Check for permission to change owner/group of a file or directory. - * @path contains the path structure. - * @uid contains new owner's ID. - * @gid contains new group's ID. - * Return 0 if permission is granted. - * @path_chroot: - * Check for permission to change root directory. - * @path contains the path structure. - * Return 0 if permission is granted. - * @inode_readlink: - * Check the permission to read the symbolic link. - * @dentry contains the dentry structure for the file link. - * Return 0 if permission is granted. - * @inode_follow_link: - * Check permission to follow a symbolic link when looking up a pathname. - * @dentry contains the dentry structure for the link. - * @nd contains the nameidata structure for the parent directory. - * Return 0 if permission is granted. - * @inode_permission: - * Check permission before accessing an inode. This hook is called by the - * existing Linux permission function, so a security module can use it to - * provide additional checking for existing Linux permission checks. - * Notice that this hook is called when a file is opened (as well as many - * other operations), whereas the file_security_ops permission hook is - * called when the actual read/write operations are performed. - * @inode contains the inode structure to check. - * @mask contains the permission mask. - * Return 0 if permission is granted. - * @inode_setattr: - * Check permission before setting file attributes. Note that the kernel - * call to notify_change is performed from several locations, whenever - * file attributes change (such as when a file is truncated, chown/chmod - * operations, transferring disk quotas, etc). - * @dentry contains the dentry structure for the file. - * @attr is the iattr structure containing the new file attributes. - * Return 0 if permission is granted. - * @path_truncate: - * Check permission before truncating a file. - * @path contains the path structure for the file. - * Return 0 if permission is granted. - * @inode_getattr: - * Check permission before obtaining file attributes. - * @mnt is the vfsmount where the dentry was looked up - * @dentry contains the dentry structure for the file. - * Return 0 if permission is granted. - * @inode_setxattr: - * Check permission before setting the extended attributes - * @value identified by @name for @dentry. - * Return 0 if permission is granted. - * @inode_post_setxattr: - * Update inode security field after successful setxattr operation. - * @value identified by @name for @dentry. - * @inode_getxattr: - * Check permission before obtaining the extended attributes - * identified by @name for @dentry. - * Return 0 if permission is granted. - * @inode_listxattr: - * Check permission before obtaining the list of extended attribute - * names for @dentry. - * Return 0 if permission is granted. - * @inode_removexattr: - * Check permission before removing the extended attribute - * identified by @name for @dentry. - * Return 0 if permission is granted. - * @inode_getsecurity: - * Retrieve a copy of the extended attribute representation of the - * security label associated with @name for @inode via @buffer. Note that - * @name is the remainder of the attribute name after the security prefix - * has been removed. @alloc is used to specify of the call should return a - * value via the buffer or just the value length Return size of buffer on - * success. - * @inode_setsecurity: - * Set the security label associated with @name for @inode from the - * extended attribute value @value. @size indicates the size of the - * @value in bytes. @flags may be XATTR_CREATE, XATTR_REPLACE, or 0. - * Note that @name is the remainder of the attribute name after the - * security. prefix has been removed. - * Return 0 on success. - * @inode_listsecurity: - * Copy the extended attribute names for the security labels - * associated with @inode into @buffer. The maximum size of @buffer - * is specified by @buffer_size. @buffer may be NULL to request - * the size of the buffer required. - * Returns number of bytes used/required on success. - * @inode_need_killpriv: - * Called when an inode has been changed. - * @dentry is the dentry being changed. - * Return <0 on error to abort the inode change operation. - * Return 0 if inode_killpriv does not need to be called. - * Return >0 if inode_killpriv does need to be called. - * @inode_killpriv: - * The setuid bit is being removed. Remove similar security labels. - * Called with the dentry->d_inode->i_mutex held. - * @dentry is the dentry being changed. - * Return 0 on success. If error is returned, then the operation - * causing setuid bit removal is failed. - * @inode_getsecid: - * Get the secid associated with the node. - * @inode contains a pointer to the inode. - * @secid contains a pointer to the location where result will be saved. - * In case of failure, @secid will be set to zero. - * - * Security hooks for file operations - * - * @file_permission: - * Check file permissions before accessing an open file. This hook is - * called by various operations that read or write files. A security - * module can use this hook to perform additional checking on these - * operations, e.g. to revalidate permissions on use to support privilege - * bracketing or policy changes. Notice that this hook is used when the - * actual read/write operations are performed, whereas the - * inode_security_ops hook is called when a file is opened (as well as - * many other operations). - * Caveat: Although this hook can be used to revalidate permissions for - * various system call operations that read or write files, it does not - * address the revalidation of permissions for memory-mapped files. - * Security modules must handle this separately if they need such - * revalidation. - * @file contains the file structure being accessed. - * @mask contains the requested permissions. - * Return 0 if permission is granted. - * @file_alloc_security: - * Allocate and attach a security structure to the file->f_security field. - * The security field is initialized to NULL when the structure is first - * created. - * @file contains the file structure to secure. - * Return 0 if the hook is successful and permission is granted. - * @file_free_security: - * Deallocate and free any security structures stored in file->f_security. - * @file contains the file structure being modified. - * @file_ioctl: - * @file contains the file structure. - * @cmd contains the operation to perform. - * @arg contains the operational arguments. - * Check permission for an ioctl operation on @file. Note that @arg - * sometimes represents a user space pointer; in other cases, it may be a - * simple integer value. When @arg represents a user space pointer, it - * should never be used by the security module. - * Return 0 if permission is granted. - * @mmap_addr : - * Check permissions for a mmap operation at @addr. - * @addr contains virtual address that will be used for the operation. - * Return 0 if permission is granted. - * @mmap_file : - * Check permissions for a mmap operation. The @file may be NULL, e.g. - * if mapping anonymous memory. - * @file contains the file structure for file to map (may be NULL). - * @reqprot contains the protection requested by the application. - * @prot contains the protection that will be applied by the kernel. - * @flags contains the operational flags. - * Return 0 if permission is granted. - * @file_mprotect: - * Check permissions before changing memory access permissions. - * @vma contains the memory region to modify. - * @reqprot contains the protection requested by the application. - * @prot contains the protection that will be applied by the kernel. - * Return 0 if permission is granted. - * @file_lock: - * Check permission before performing file locking operations. - * Note: this hook mediates both flock and fcntl style locks. - * @file contains the file structure. - * @cmd contains the posix-translated lock operation to perform - * (e.g. F_RDLCK, F_WRLCK). - * Return 0 if permission is granted. - * @file_fcntl: - * Check permission before allowing the file operation specified by @cmd - * from being performed on the file @file. Note that @arg sometimes - * represents a user space pointer; in other cases, it may be a simple - * integer value. When @arg represents a user space pointer, it should - * never be used by the security module. - * @file contains the file structure. - * @cmd contains the operation to be performed. - * @arg contains the operational arguments. - * Return 0 if permission is granted. - * @file_set_fowner: - * Save owner security information (typically from current->security) in - * file->f_security for later use by the send_sigiotask hook. - * @file contains the file structure to update. - * Return 0 on success. - * @file_send_sigiotask: - * Check permission for the file owner @fown to send SIGIO or SIGURG to the - * process @tsk. Note that this hook is sometimes called from interrupt. - * Note that the fown_struct, @fown, is never outside the context of a - * struct file, so the file structure (and associated security information) - * can always be obtained: - * container_of(fown, struct file, f_owner) - * @tsk contains the structure of task receiving signal. - * @fown contains the file owner information. - * @sig is the signal that will be sent. When 0, kernel sends SIGIO. - * Return 0 if permission is granted. - * @file_receive: - * This hook allows security modules to control the ability of a process - * to receive an open file descriptor via socket IPC. - * @file contains the file structure being received. - * Return 0 if permission is granted. - * @file_open - * Save open-time permission checking state for later use upon - * file_permission, and recheck access if anything has changed - * since inode_permission. - * - * Security hooks for task operations. - * - * @task_create: - * Check permission before creating a child process. See the clone(2) - * manual page for definitions of the @clone_flags. - * @clone_flags contains the flags indicating what should be shared. - * Return 0 if permission is granted. - * @task_free: - * @task task being freed - * Handle release of task-related resources. (Note that this can be called - * from interrupt context.) - * @cred_alloc_blank: - * @cred points to the credentials. - * @gfp indicates the atomicity of any memory allocations. - * Only allocate sufficient memory and attach to @cred such that - * cred_transfer() will not get ENOMEM. - * @cred_free: - * @cred points to the credentials. - * Deallocate and clear the cred->security field in a set of credentials. - * @cred_prepare: - * @new points to the new credentials. - * @old points to the original credentials. - * @gfp indicates the atomicity of any memory allocations. - * Prepare a new set of credentials by copying the data from the old set. - * @cred_transfer: - * @new points to the new credentials. - * @old points to the original credentials. - * Transfer data from original creds to new creds - * @kernel_act_as: - * Set the credentials for a kernel service to act as (subjective context). - * @new points to the credentials to be modified. - * @secid specifies the security ID to be set - * The current task must be the one that nominated @secid. - * Return 0 if successful. - * @kernel_create_files_as: - * Set the file creation context in a set of credentials to be the same as - * the objective context of the specified inode. - * @new points to the credentials to be modified. - * @inode points to the inode to use as a reference. - * The current task must be the one that nominated @inode. - * Return 0 if successful. - * @kernel_fw_from_file: - * Load firmware from userspace (not called for built-in firmware). - * @file contains the file structure pointing to the file containing - * the firmware to load. This argument will be NULL if the firmware - * was loaded via the uevent-triggered blob-based interface exposed - * by CONFIG_FW_LOADER_USER_HELPER. - * @buf pointer to buffer containing firmware contents. - * @size length of the firmware contents. - * Return 0 if permission is granted. - * @kernel_module_request: - * Ability to trigger the kernel to automatically upcall to userspace for - * userspace to load a kernel module with the given name. - * @kmod_name name of the module requested by the kernel - * Return 0 if successful. - * @kernel_module_from_file: - * Load a kernel module from userspace. - * @file contains the file structure pointing to the file containing - * the kernel module to load. If the module is being loaded from a blob, - * this argument will be NULL. - * Return 0 if permission is granted. - * @task_fix_setuid: - * Update the module's state after setting one or more of the user - * identity attributes of the current process. The @flags parameter - * indicates which of the set*uid system calls invoked this hook. If - * @new is the set of credentials that will be installed. Modifications - * should be made to this rather than to @current->cred. - * @old is the set of credentials that are being replaces - * @flags contains one of the LSM_SETID_* values. - * Return 0 on success. - * @task_setpgid: - * Check permission before setting the process group identifier of the - * process @p to @pgid. - * @p contains the task_struct for process being modified. - * @pgid contains the new pgid. - * Return 0 if permission is granted. - * @task_getpgid: - * Check permission before getting the process group identifier of the - * process @p. - * @p contains the task_struct for the process. - * Return 0 if permission is granted. - * @task_getsid: - * Check permission before getting the session identifier of the process - * @p. - * @p contains the task_struct for the process. - * Return 0 if permission is granted. - * @task_getsecid: - * Retrieve the security identifier of the process @p. - * @p contains the task_struct for the process and place is into @secid. - * In case of failure, @secid will be set to zero. - * - * @task_setnice: - * Check permission before setting the nice value of @p to @nice. - * @p contains the task_struct of process. - * @nice contains the new nice value. - * Return 0 if permission is granted. - * @task_setioprio - * Check permission before setting the ioprio value of @p to @ioprio. - * @p contains the task_struct of process. - * @ioprio contains the new ioprio value - * Return 0 if permission is granted. - * @task_getioprio - * Check permission before getting the ioprio value of @p. - * @p contains the task_struct of process. - * Return 0 if permission is granted. - * @task_setrlimit: - * Check permission before setting the resource limits of the current - * process for @resource to @new_rlim. The old resource limit values can - * be examined by dereferencing (current->signal->rlim + resource). - * @resource contains the resource whose limit is being set. - * @new_rlim contains the new limits for @resource. - * Return 0 if permission is granted. - * @task_setscheduler: - * Check permission before setting scheduling policy and/or parameters of - * process @p based on @policy and @lp. - * @p contains the task_struct for process. - * @policy contains the scheduling policy. - * @lp contains the scheduling parameters. - * Return 0 if permission is granted. - * @task_getscheduler: - * Check permission before obtaining scheduling information for process - * @p. - * @p contains the task_struct for process. - * Return 0 if permission is granted. - * @task_movememory - * Check permission before moving memory owned by process @p. - * @p contains the task_struct for process. - * Return 0 if permission is granted. - * @task_kill: - * Check permission before sending signal @sig to @p. @info can be NULL, - * the constant 1, or a pointer to a siginfo structure. If @info is 1 or - * SI_FROMKERNEL(info) is true, then the signal should be viewed as coming - * from the kernel and should typically be permitted. - * SIGIO signals are handled separately by the send_sigiotask hook in - * file_security_ops. - * @p contains the task_struct for process. - * @info contains the signal information. - * @sig contains the signal value. - * @secid contains the sid of the process where the signal originated - * Return 0 if permission is granted. - * @task_wait: - * Check permission before allowing a process to reap a child process @p - * and collect its status information. - * @p contains the task_struct for process. - * Return 0 if permission is granted. - * @task_prctl: - * Check permission before performing a process control operation on the - * current process. - * @option contains the operation. - * @arg2 contains a argument. - * @arg3 contains a argument. - * @arg4 contains a argument. - * @arg5 contains a argument. - * Return -ENOSYS if no-one wanted to handle this op, any other value to - * cause prctl() to return immediately with that value. - * @task_to_inode: - * Set the security attributes for an inode based on an associated task's - * security attributes, e.g. for /proc/pid inodes. - * @p contains the task_struct for the task. - * @inode contains the inode structure for the inode. - * - * Security hooks for Netlink messaging. - * - * @netlink_send: - * Save security information for a netlink message so that permission - * checking can be performed when the message is processed. The security - * information can be saved using the eff_cap field of the - * netlink_skb_parms structure. Also may be used to provide fine - * grained control over message transmission. - * @sk associated sock of task sending the message. - * @skb contains the sk_buff structure for the netlink message. - * Return 0 if the information was successfully saved and message - * is allowed to be transmitted. - * - * Security hooks for Unix domain networking. - * - * @unix_stream_connect: - * Check permissions before establishing a Unix domain stream connection - * between @sock and @other. - * @sock contains the sock structure. - * @other contains the peer sock structure. - * @newsk contains the new sock structure. - * Return 0 if permission is granted. - * @unix_may_send: - * Check permissions before connecting or sending datagrams from @sock to - * @other. - * @sock contains the socket structure. - * @other contains the peer socket structure. - * Return 0 if permission is granted. - * - * The @unix_stream_connect and @unix_may_send hooks were necessary because - * Linux provides an alternative to the conventional file name space for Unix - * domain sockets. Whereas binding and connecting to sockets in the file name - * space is mediated by the typical file permissions (and caught by the mknod - * and permission hooks in inode_security_ops), binding and connecting to - * sockets in the abstract name space is completely unmediated. Sufficient - * control of Unix domain sockets in the abstract name space isn't possible - * using only the socket layer hooks, since we need to know the actual target - * socket, which is not looked up until we are inside the af_unix code. - * - * Security hooks for socket operations. - * - * @socket_create: - * Check permissions prior to creating a new socket. - * @family contains the requested protocol family. - * @type contains the requested communications type. - * @protocol contains the requested protocol. - * @kern set to 1 if a kernel socket. - * Return 0 if permission is granted. - * @socket_post_create: - * This hook allows a module to update or allocate a per-socket security - * structure. Note that the security field was not added directly to the - * socket structure, but rather, the socket security information is stored - * in the associated inode. Typically, the inode alloc_security hook will - * allocate and and attach security information to - * sock->inode->i_security. This hook may be used to update the - * sock->inode->i_security field with additional information that wasn't - * available when the inode was allocated. - * @sock contains the newly created socket structure. - * @family contains the requested protocol family. - * @type contains the requested communications type. - * @protocol contains the requested protocol. - * @kern set to 1 if a kernel socket. - * @socket_bind: - * Check permission before socket protocol layer bind operation is - * performed and the socket @sock is bound to the address specified in the - * @address parameter. - * @sock contains the socket structure. - * @address contains the address to bind to. - * @addrlen contains the length of address. - * Return 0 if permission is granted. - * @socket_connect: - * Check permission before socket protocol layer connect operation - * attempts to connect socket @sock to a remote address, @address. - * @sock contains the socket structure. - * @address contains the address of remote endpoint. - * @addrlen contains the length of address. - * Return 0 if permission is granted. - * @socket_listen: - * Check permission before socket protocol layer listen operation. - * @sock contains the socket structure. - * @backlog contains the maximum length for the pending connection queue. - * Return 0 if permission is granted. - * @socket_accept: - * Check permission before accepting a new connection. Note that the new - * socket, @newsock, has been created and some information copied to it, - * but the accept operation has not actually been performed. - * @sock contains the listening socket structure. - * @newsock contains the newly created server socket for connection. - * Return 0 if permission is granted. - * @socket_sendmsg: - * Check permission before transmitting a message to another socket. - * @sock contains the socket structure. - * @msg contains the message to be transmitted. - * @size contains the size of message. - * Return 0 if permission is granted. - * @socket_recvmsg: - * Check permission before receiving a message from a socket. - * @sock contains the socket structure. - * @msg contains the message structure. - * @size contains the size of message structure. - * @flags contains the operational flags. - * Return 0 if permission is granted. - * @socket_getsockname: - * Check permission before the local address (name) of the socket object - * @sock is retrieved. - * @sock contains the socket structure. - * Return 0 if permission is granted. - * @socket_getpeername: - * Check permission before the remote address (name) of a socket object - * @sock is retrieved. - * @sock contains the socket structure. - * Return 0 if permission is granted. - * @socket_getsockopt: - * Check permissions before retrieving the options associated with socket - * @sock. - * @sock contains the socket structure. - * @level contains the protocol level to retrieve option from. - * @optname contains the name of option to retrieve. - * Return 0 if permission is granted. - * @socket_setsockopt: - * Check permissions before setting the options associated with socket - * @sock. - * @sock contains the socket structure. - * @level contains the protocol level to set options for. - * @optname contains the name of the option to set. - * Return 0 if permission is granted. - * @socket_shutdown: - * Checks permission before all or part of a connection on the socket - * @sock is shut down. - * @sock contains the socket structure. - * @how contains the flag indicating how future sends and receives are handled. - * Return 0 if permission is granted. - * @socket_sock_rcv_skb: - * Check permissions on incoming network packets. This hook is distinct - * from Netfilter's IP input hooks since it is the first time that the - * incoming sk_buff @skb has been associated with a particular socket, @sk. - * Must not sleep inside this hook because some callers hold spinlocks. - * @sk contains the sock (not socket) associated with the incoming sk_buff. - * @skb contains the incoming network data. - * @socket_getpeersec_stream: - * This hook allows the security module to provide peer socket security - * state for unix or connected tcp sockets to userspace via getsockopt - * SO_GETPEERSEC. For tcp sockets this can be meaningful if the - * socket is associated with an ipsec SA. - * @sock is the local socket. - * @optval userspace memory where the security state is to be copied. - * @optlen userspace int where the module should copy the actual length - * of the security state. - * @len as input is the maximum length to copy to userspace provided - * by the caller. - * Return 0 if all is well, otherwise, typical getsockopt return - * values. - * @socket_getpeersec_dgram: - * This hook allows the security module to provide peer socket security - * state for udp sockets on a per-packet basis to userspace via - * getsockopt SO_GETPEERSEC. The application must first have indicated - * the IP_PASSSEC option via getsockopt. It can then retrieve the - * security state returned by this hook for a packet via the SCM_SECURITY - * ancillary message type. - * @skb is the skbuff for the packet being queried - * @secdata is a pointer to a buffer in which to copy the security data - * @seclen is the maximum length for @secdata - * Return 0 on success, error on failure. - * @sk_alloc_security: - * Allocate and attach a security structure to the sk->sk_security field, - * which is used to copy security attributes between local stream sockets. - * @sk_free_security: - * Deallocate security structure. - * @sk_clone_security: - * Clone/copy security structure. - * @sk_getsecid: - * Retrieve the LSM-specific secid for the sock to enable caching of network - * authorizations. - * @sock_graft: - * Sets the socket's isec sid to the sock's sid. - * @inet_conn_request: - * Sets the openreq's sid to socket's sid with MLS portion taken from peer sid. - * @inet_csk_clone: - * Sets the new child socket's sid to the openreq sid. - * @inet_conn_established: - * Sets the connection's peersid to the secmark on skb. - * @secmark_relabel_packet: - * check if the process should be allowed to relabel packets to the given secid - * @security_secmark_refcount_inc - * tells the LSM to increment the number of secmark labeling rules loaded - * @security_secmark_refcount_dec - * tells the LSM to decrement the number of secmark labeling rules loaded - * @req_classify_flow: - * Sets the flow's sid to the openreq sid. - * @tun_dev_alloc_security: - * This hook allows a module to allocate a security structure for a TUN - * device. - * @security pointer to a security structure pointer. - * Returns a zero on success, negative values on failure. - * @tun_dev_free_security: - * This hook allows a module to free the security structure for a TUN - * device. - * @security pointer to the TUN device's security structure - * @tun_dev_create: - * Check permissions prior to creating a new TUN device. - * @tun_dev_attach_queue: - * Check permissions prior to attaching to a TUN device queue. - * @security pointer to the TUN device's security structure. - * @tun_dev_attach: - * This hook can be used by the module to update any security state - * associated with the TUN device's sock structure. - * @sk contains the existing sock structure. - * @security pointer to the TUN device's security structure. - * @tun_dev_open: - * This hook can be used by the module to update any security state - * associated with the TUN device's security structure. - * @security pointer to the TUN devices's security structure. - * @skb_owned_by: - * This hook sets the packet's owning sock. - * @skb is the packet. - * @sk the sock which owns the packet. - * - * Security hooks for XFRM operations. - * - * @xfrm_policy_alloc_security: - * @ctxp is a pointer to the xfrm_sec_ctx being added to Security Policy - * Database used by the XFRM system. - * @sec_ctx contains the security context information being provided by - * the user-level policy update program (e.g., setkey). - * Allocate a security structure to the xp->security field; the security - * field is initialized to NULL when the xfrm_policy is allocated. - * Return 0 if operation was successful (memory to allocate, legal context) - * @gfp is to specify the context for the allocation - * @xfrm_policy_clone_security: - * @old_ctx contains an existing xfrm_sec_ctx. - * @new_ctxp contains a new xfrm_sec_ctx being cloned from old. - * Allocate a security structure in new_ctxp that contains the - * information from the old_ctx structure. - * Return 0 if operation was successful (memory to allocate). - * @xfrm_policy_free_security: - * @ctx contains the xfrm_sec_ctx - * Deallocate xp->security. - * @xfrm_policy_delete_security: - * @ctx contains the xfrm_sec_ctx. - * Authorize deletion of xp->security. - * @xfrm_state_alloc: - * @x contains the xfrm_state being added to the Security Association - * Database by the XFRM system. - * @sec_ctx contains the security context information being provided by - * the user-level SA generation program (e.g., setkey or racoon). - * Allocate a security structure to the x->security field; the security - * field is initialized to NULL when the xfrm_state is allocated. Set the - * context to correspond to sec_ctx. Return 0 if operation was successful - * (memory to allocate, legal context). - * @xfrm_state_alloc_acquire: - * @x contains the xfrm_state being added to the Security Association - * Database by the XFRM system. - * @polsec contains the policy's security context. - * @secid contains the secid from which to take the mls portion of the - * context. - * Allocate a security structure to the x->security field; the security - * field is initialized to NULL when the xfrm_state is allocated. Set the - * context to correspond to secid. Return 0 if operation was successful - * (memory to allocate, legal context). - * @xfrm_state_free_security: - * @x contains the xfrm_state. - * Deallocate x->security. - * @xfrm_state_delete_security: - * @x contains the xfrm_state. - * Authorize deletion of x->security. - * @xfrm_policy_lookup: - * @ctx contains the xfrm_sec_ctx for which the access control is being - * checked. - * @fl_secid contains the flow security label that is used to authorize - * access to the policy xp. - * @dir contains the direction of the flow (input or output). - * Check permission when a flow selects a xfrm_policy for processing - * XFRMs on a packet. The hook is called when selecting either a - * per-socket policy or a generic xfrm policy. - * Return 0 if permission is granted, -ESRCH otherwise, or -errno - * on other errors. - * @xfrm_state_pol_flow_match: - * @x contains the state to match. - * @xp contains the policy to check for a match. - * @fl contains the flow to check for a match. - * Return 1 if there is a match. - * @xfrm_decode_session: - * @skb points to skb to decode. - * @secid points to the flow key secid to set. - * @ckall says if all xfrms used should be checked for same secid. - * Return 0 if ckall is zero or all xfrms used have the same secid. - * - * Security hooks affecting all Key Management operations - * - * @key_alloc: - * Permit allocation of a key and assign security data. Note that key does - * not have a serial number assigned at this point. - * @key points to the key. - * @flags is the allocation flags - * Return 0 if permission is granted, -ve error otherwise. - * @key_free: - * Notification of destruction; free security data. - * @key points to the key. - * No return value. - * @key_permission: - * See whether a specific operational right is granted to a process on a - * key. - * @key_ref refers to the key (key pointer + possession attribute bit). - * @cred points to the credentials to provide the context against which to - * evaluate the security data on the key. - * @perm describes the combination of permissions required of this key. - * Return 0 if permission is granted, -ve error otherwise. - * @key_getsecurity: - * Get a textual representation of the security context attached to a key - * for the purposes of honouring KEYCTL_GETSECURITY. This function - * allocates the storage for the NUL-terminated string and the caller - * should free it. - * @key points to the key to be queried. - * @_buffer points to a pointer that should be set to point to the - * resulting string (if no label or an error occurs). - * Return the length of the string (including terminating NUL) or -ve if - * an error. - * May also return 0 (and a NULL buffer pointer) if there is no label. - * - * Security hooks affecting all System V IPC operations. - * - * @ipc_permission: - * Check permissions for access to IPC - * @ipcp contains the kernel IPC permission structure - * @flag contains the desired (requested) permission set - * Return 0 if permission is granted. - * @ipc_getsecid: - * Get the secid associated with the ipc object. - * @ipcp contains the kernel IPC permission structure. - * @secid contains a pointer to the location where result will be saved. - * In case of failure, @secid will be set to zero. - * - * Security hooks for individual messages held in System V IPC message queues - * @msg_msg_alloc_security: - * Allocate and attach a security structure to the msg->security field. - * The security field is initialized to NULL when the structure is first - * created. - * @msg contains the message structure to be modified. - * Return 0 if operation was successful and permission is granted. - * @msg_msg_free_security: - * Deallocate the security structure for this message. - * @msg contains the message structure to be modified. - * - * Security hooks for System V IPC Message Queues - * - * @msg_queue_alloc_security: - * Allocate and attach a security structure to the - * msq->q_perm.security field. The security field is initialized to - * NULL when the structure is first created. - * @msq contains the message queue structure to be modified. - * Return 0 if operation was successful and permission is granted. - * @msg_queue_free_security: - * Deallocate security structure for this message queue. - * @msq contains the message queue structure to be modified. - * @msg_queue_associate: - * Check permission when a message queue is requested through the - * msgget system call. This hook is only called when returning the - * message queue identifier for an existing message queue, not when a - * new message queue is created. - * @msq contains the message queue to act upon. - * @msqflg contains the operation control flags. - * Return 0 if permission is granted. - * @msg_queue_msgctl: - * Check permission when a message control operation specified by @cmd - * is to be performed on the message queue @msq. - * The @msq may be NULL, e.g. for IPC_INFO or MSG_INFO. - * @msq contains the message queue to act upon. May be NULL. - * @cmd contains the operation to be performed. - * Return 0 if permission is granted. - * @msg_queue_msgsnd: - * Check permission before a message, @msg, is enqueued on the message - * queue, @msq. - * @msq contains the message queue to send message to. - * @msg contains the message to be enqueued. - * @msqflg contains operational flags. - * Return 0 if permission is granted. - * @msg_queue_msgrcv: - * Check permission before a message, @msg, is removed from the message - * queue, @msq. The @target task structure contains a pointer to the - * process that will be receiving the message (not equal to the current - * process when inline receives are being performed). - * @msq contains the message queue to retrieve message from. - * @msg contains the message destination. - * @target contains the task structure for recipient process. - * @type contains the type of message requested. - * @mode contains the operational flags. - * Return 0 if permission is granted. - * - * Security hooks for System V Shared Memory Segments - * - * @shm_alloc_security: - * Allocate and attach a security structure to the shp->shm_perm.security - * field. The security field is initialized to NULL when the structure is - * first created. - * @shp contains the shared memory structure to be modified. - * Return 0 if operation was successful and permission is granted. - * @shm_free_security: - * Deallocate the security struct for this memory segment. - * @shp contains the shared memory structure to be modified. - * @shm_associate: - * Check permission when a shared memory region is requested through the - * shmget system call. This hook is only called when returning the shared - * memory region identifier for an existing region, not when a new shared - * memory region is created. - * @shp contains the shared memory structure to be modified. - * @shmflg contains the operation control flags. - * Return 0 if permission is granted. - * @shm_shmctl: - * Check permission when a shared memory control operation specified by - * @cmd is to be performed on the shared memory region @shp. - * The @shp may be NULL, e.g. for IPC_INFO or SHM_INFO. - * @shp contains shared memory structure to be modified. - * @cmd contains the operation to be performed. - * Return 0 if permission is granted. - * @shm_shmat: - * Check permissions prior to allowing the shmat system call to attach the - * shared memory segment @shp to the data segment of the calling process. - * The attaching address is specified by @shmaddr. - * @shp contains the shared memory structure to be modified. - * @shmaddr contains the address to attach memory region to. - * @shmflg contains the operational flags. - * Return 0 if permission is granted. - * - * Security hooks for System V Semaphores - * - * @sem_alloc_security: - * Allocate and attach a security structure to the sma->sem_perm.security - * field. The security field is initialized to NULL when the structure is - * first created. - * @sma contains the semaphore structure - * Return 0 if operation was successful and permission is granted. - * @sem_free_security: - * deallocate security struct for this semaphore - * @sma contains the semaphore structure. - * @sem_associate: - * Check permission when a semaphore is requested through the semget - * system call. This hook is only called when returning the semaphore - * identifier for an existing semaphore, not when a new one must be - * created. - * @sma contains the semaphore structure. - * @semflg contains the operation control flags. - * Return 0 if permission is granted. - * @sem_semctl: - * Check permission when a semaphore operation specified by @cmd is to be - * performed on the semaphore @sma. The @sma may be NULL, e.g. for - * IPC_INFO or SEM_INFO. - * @sma contains the semaphore structure. May be NULL. - * @cmd contains the operation to be performed. - * Return 0 if permission is granted. - * @sem_semop - * Check permissions before performing operations on members of the - * semaphore set @sma. If the @alter flag is nonzero, the semaphore set - * may be modified. - * @sma contains the semaphore structure. - * @sops contains the operations to perform. - * @nsops contains the number of operations to perform. - * @alter contains the flag indicating whether changes are to be made. - * Return 0 if permission is granted. - * - * @binder_set_context_mgr - * Check whether @mgr is allowed to be the binder context manager. - * @mgr contains the task_struct for the task being registered. - * Return 0 if permission is granted. - * @binder_transaction - * Check whether @from is allowed to invoke a binder transaction call - * to @to. - * @from contains the task_struct for the sending task. - * @to contains the task_struct for the receiving task. - * @binder_transfer_binder - * Check whether @from is allowed to transfer a binder reference to @to. - * @from contains the task_struct for the sending task. - * @to contains the task_struct for the receiving task. - * @binder_transfer_file - * Check whether @from is allowed to transfer @file to @to. - * @from contains the task_struct for the sending task. - * @file contains the struct file being transferred. - * @to contains the task_struct for the receiving task. - * - * @ptrace_access_check: - * Check permission before allowing the current process to trace the - * @child process. - * Security modules may also want to perform a process tracing check - * during an execve in the set_security or apply_creds hooks of - * tracing check during an execve in the bprm_set_creds hook of - * binprm_security_ops if the process is being traced and its security - * attributes would be changed by the execve. - * @child contains the task_struct structure for the target process. - * @mode contains the PTRACE_MODE flags indicating the form of access. - * Return 0 if permission is granted. - * @ptrace_traceme: - * Check that the @parent process has sufficient permission to trace the - * current process before allowing the current process to present itself - * to the @parent process for tracing. - * @parent contains the task_struct structure for debugger process. - * Return 0 if permission is granted. - * @capget: - * Get the @effective, @inheritable, and @permitted capability sets for - * the @target process. The hook may also perform permission checking to - * determine if the current process is allowed to see the capability sets - * of the @target process. - * @target contains the task_struct structure for target process. - * @effective contains the effective capability set. - * @inheritable contains the inheritable capability set. - * @permitted contains the permitted capability set. - * Return 0 if the capability sets were successfully obtained. - * @capset: - * Set the @effective, @inheritable, and @permitted capability sets for - * the current process. - * @new contains the new credentials structure for target process. - * @old contains the current credentials structure for target process. - * @effective contains the effective capability set. - * @inheritable contains the inheritable capability set. - * @permitted contains the permitted capability set. - * Return 0 and update @new if permission is granted. - * @capable: - * Check whether the @tsk process has the @cap capability in the indicated - * credentials. - * @cred contains the credentials to use. - * @ns contains the user namespace we want the capability in - * @cap contains the capability . - * @audit: Whether to write an audit message or not - * Return 0 if the capability is granted for @tsk. - * @syslog: - * Check permission before accessing the kernel message ring or changing - * logging to the console. - * See the syslog(2) manual page for an explanation of the @type values. - * @type contains the type of action. - * @from_file indicates the context of action (if it came from /proc). - * Return 0 if permission is granted. - * @settime: - * Check permission to change the system time. - * struct timespec and timezone are defined in include/linux/time.h - * @ts contains new time - * @tz contains new timezone - * Return 0 if permission is granted. - * @vm_enough_memory: - * Check permissions for allocating a new virtual mapping. - * @mm contains the mm struct it is being added to. - * @pages contains the number of pages. - * Return 0 if permission is granted. - * - * @ismaclabel: - * Check if the extended attribute specified by @name - * represents a MAC label. Returns 1 if name is a MAC - * attribute otherwise returns 0. - * @name full extended attribute name to check against - * LSM as a MAC label. - * - * @secid_to_secctx: - * Convert secid to security context. If secdata is NULL the length of - * the result will be returned in seclen, but no secdata will be returned. - * This does mean that the length could change between calls to check the - * length and the next call which actually allocates and returns the secdata. - * @secid contains the security ID. - * @secdata contains the pointer that stores the converted security context. - * @seclen pointer which contains the length of the data - * @secctx_to_secid: - * Convert security context to secid. - * @secid contains the pointer to the generated security ID. - * @secdata contains the security context. - * - * @release_secctx: - * Release the security context. - * @secdata contains the security context. - * @seclen contains the length of the security context. - * - * Security hooks for Audit - * - * @audit_rule_init: - * Allocate and initialize an LSM audit rule structure. - * @field contains the required Audit action. Fields flags are defined in include/linux/audit.h - * @op contains the operator the rule uses. - * @rulestr contains the context where the rule will be applied to. - * @lsmrule contains a pointer to receive the result. - * Return 0 if @lsmrule has been successfully set, - * -EINVAL in case of an invalid rule. - * - * @audit_rule_known: - * Specifies whether given @rule contains any fields related to current LSM. - * @rule contains the audit rule of interest. - * Return 1 in case of relation found, 0 otherwise. - * - * @audit_rule_match: - * Determine if given @secid matches a rule previously approved - * by @audit_rule_known. - * @secid contains the security id in question. - * @field contains the field which relates to current LSM. - * @op contains the operator that will be used for matching. - * @rule points to the audit rule that will be checked against. - * @actx points to the audit context associated with the check. - * Return 1 if secid matches the rule, 0 if it does not, -ERRNO on failure. - * - * @audit_rule_free: - * Deallocate the LSM audit rule structure previously allocated by - * audit_rule_init. - * @rule contains the allocated rule - * - * @inode_notifysecctx: - * Notify the security module of what the security context of an inode - * should be. Initializes the incore security context managed by the - * security module for this inode. Example usage: NFS client invokes - * this hook to initialize the security context in its incore inode to the - * value provided by the server for the file when the server returned the - * file's attributes to the client. - * - * Must be called with inode->i_mutex locked. - * - * @inode we wish to set the security context of. - * @ctx contains the string which we wish to set in the inode. - * @ctxlen contains the length of @ctx. - * - * @inode_setsecctx: - * Change the security context of an inode. Updates the - * incore security context managed by the security module and invokes the - * fs code as needed (via __vfs_setxattr_noperm) to update any backing - * xattrs that represent the context. Example usage: NFS server invokes - * this hook to change the security context in its incore inode and on the - * backing filesystem to a value provided by the client on a SETATTR - * operation. - * - * Must be called with inode->i_mutex locked. - * - * @dentry contains the inode we wish to set the security context of. - * @ctx contains the string which we wish to set in the inode. - * @ctxlen contains the length of @ctx. - * - * @inode_getsecctx: - * On success, returns 0 and fills out @ctx and @ctxlen with the security - * context for the given @inode. - * - * @inode we wish to get the security context of. - * @ctx is a pointer in which to place the allocated security context. - * @ctxlen points to the place to put the length of @ctx. - * This is the main security structure. - */ - /* prototypes */ extern int security_init(void); -- cgit v1.2.3 From e20b043a6902ecb61c2c84355c3bae5149f391db Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Sat, 2 May 2015 15:11:36 -0700 Subject: LSM: Add security module hook list heads Add a list header for each security hook. They aren't used until later in the patch series. They are grouped together in a structure so that there doesn't need to be an external address for each. Macro-ize the initialization of the security_operations for each security module in anticipation of changing out the security_operations structure. Signed-off-by: Casey Schaufler Acked-by: John Johansen Acked-by: Kees Cook Acked-by: Paul Moore Acked-by: Stephen Smalley Acked-by: Tetsuo Handa Signed-off-by: James Morris --- include/linux/lsm_hooks.h | 220 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 220 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index b4c91de510c2..27dd6fcacccc 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1626,6 +1626,226 @@ struct security_operations { #endif /* CONFIG_AUDIT */ }; +struct security_hook_heads { + struct list_head binder_set_context_mgr; + struct list_head binder_transaction; + struct list_head binder_transfer_binder; + struct list_head binder_transfer_file; + struct list_head ptrace_access_check; + struct list_head ptrace_traceme; + struct list_head capget; + struct list_head capset; + struct list_head capable; + struct list_head quotactl; + struct list_head quota_on; + struct list_head syslog; + struct list_head settime; + struct list_head vm_enough_memory; + struct list_head bprm_set_creds; + struct list_head bprm_check_security; + struct list_head bprm_secureexec; + struct list_head bprm_committing_creds; + struct list_head bprm_committed_creds; + struct list_head sb_alloc_security; + struct list_head sb_free_security; + struct list_head sb_copy_data; + struct list_head sb_remount; + struct list_head sb_kern_mount; + struct list_head sb_show_options; + struct list_head sb_statfs; + struct list_head sb_mount; + struct list_head sb_umount; + struct list_head sb_pivotroot; + struct list_head sb_set_mnt_opts; + struct list_head sb_clone_mnt_opts; + struct list_head sb_parse_opts_str; + struct list_head dentry_init_security; +#ifdef CONFIG_SECURITY_PATH + struct list_head path_unlink; + struct list_head path_mkdir; + struct list_head path_rmdir; + struct list_head path_mknod; + struct list_head path_truncate; + struct list_head path_symlink; + struct list_head path_link; + struct list_head path_rename; + struct list_head path_chmod; + struct list_head path_chown; + struct list_head path_chroot; +#endif + struct list_head inode_alloc_security; + struct list_head inode_free_security; + struct list_head inode_init_security; + struct list_head inode_create; + struct list_head inode_link; + struct list_head inode_unlink; + struct list_head inode_symlink; + struct list_head inode_mkdir; + struct list_head inode_rmdir; + struct list_head inode_mknod; + struct list_head inode_rename; + struct list_head inode_readlink; + struct list_head inode_follow_link; + struct list_head inode_permission; + struct list_head inode_setattr; + struct list_head inode_getattr; + struct list_head inode_setxattr; + struct list_head inode_post_setxattr; + struct list_head inode_getxattr; + struct list_head inode_listxattr; + struct list_head inode_removexattr; + struct list_head inode_need_killpriv; + struct list_head inode_killpriv; + struct list_head inode_getsecurity; + struct list_head inode_setsecurity; + struct list_head inode_listsecurity; + struct list_head inode_getsecid; + struct list_head file_permission; + struct list_head file_alloc_security; + struct list_head file_free_security; + struct list_head file_ioctl; + struct list_head mmap_addr; + struct list_head mmap_file; + struct list_head file_mprotect; + struct list_head file_lock; + struct list_head file_fcntl; + struct list_head file_set_fowner; + struct list_head file_send_sigiotask; + struct list_head file_receive; + struct list_head file_open; + struct list_head task_create; + struct list_head task_free; + struct list_head cred_alloc_blank; + struct list_head cred_free; + struct list_head cred_prepare; + struct list_head cred_transfer; + struct list_head kernel_act_as; + struct list_head kernel_create_files_as; + struct list_head kernel_fw_from_file; + struct list_head kernel_module_request; + struct list_head kernel_module_from_file; + struct list_head task_fix_setuid; + struct list_head task_setpgid; + struct list_head task_getpgid; + struct list_head task_getsid; + struct list_head task_getsecid; + struct list_head task_setnice; + struct list_head task_setioprio; + struct list_head task_getioprio; + struct list_head task_setrlimit; + struct list_head task_setscheduler; + struct list_head task_getscheduler; + struct list_head task_movememory; + struct list_head task_kill; + struct list_head task_wait; + struct list_head task_prctl; + struct list_head task_to_inode; + struct list_head ipc_permission; + struct list_head ipc_getsecid; + struct list_head msg_msg_alloc_security; + struct list_head msg_msg_free_security; + struct list_head msg_queue_alloc_security; + struct list_head msg_queue_free_security; + struct list_head msg_queue_associate; + struct list_head msg_queue_msgctl; + struct list_head msg_queue_msgsnd; + struct list_head msg_queue_msgrcv; + struct list_head shm_alloc_security; + struct list_head shm_free_security; + struct list_head shm_associate; + struct list_head shm_shmctl; + struct list_head shm_shmat; + struct list_head sem_alloc_security; + struct list_head sem_free_security; + struct list_head sem_associate; + struct list_head sem_semctl; + struct list_head sem_semop; + struct list_head netlink_send; + struct list_head d_instantiate; + struct list_head getprocattr; + struct list_head setprocattr; + struct list_head ismaclabel; + struct list_head secid_to_secctx; + struct list_head secctx_to_secid; + struct list_head release_secctx; + struct list_head inode_notifysecctx; + struct list_head inode_setsecctx; + struct list_head inode_getsecctx; +#ifdef CONFIG_SECURITY_NETWORK + struct list_head unix_stream_connect; + struct list_head unix_may_send; + struct list_head socket_create; + struct list_head socket_post_create; + struct list_head socket_bind; + struct list_head socket_connect; + struct list_head socket_listen; + struct list_head socket_accept; + struct list_head socket_sendmsg; + struct list_head socket_recvmsg; + struct list_head socket_getsockname; + struct list_head socket_getpeername; + struct list_head socket_getsockopt; + struct list_head socket_setsockopt; + struct list_head socket_shutdown; + struct list_head socket_sock_rcv_skb; + struct list_head socket_getpeersec_stream; + struct list_head socket_getpeersec_dgram; + struct list_head sk_alloc_security; + struct list_head sk_free_security; + struct list_head sk_clone_security; + struct list_head sk_getsecid; + struct list_head sock_graft; + struct list_head inet_conn_request; + struct list_head inet_csk_clone; + struct list_head inet_conn_established; + struct list_head secmark_relabel_packet; + struct list_head secmark_refcount_inc; + struct list_head secmark_refcount_dec; + struct list_head req_classify_flow; + struct list_head tun_dev_alloc_security; + struct list_head tun_dev_free_security; + struct list_head tun_dev_create; + struct list_head tun_dev_attach_queue; + struct list_head tun_dev_attach; + struct list_head tun_dev_open; + struct list_head skb_owned_by; +#endif /* CONFIG_SECURITY_NETWORK */ +#ifdef CONFIG_SECURITY_NETWORK_XFRM + struct list_head xfrm_policy_alloc_security; + struct list_head xfrm_policy_clone_security; + struct list_head xfrm_policy_free_security; + struct list_head xfrm_policy_delete_security; + struct list_head xfrm_state_alloc; + struct list_head xfrm_state_alloc_acquire; + struct list_head xfrm_state_free_security; + struct list_head xfrm_state_delete_security; + struct list_head xfrm_policy_lookup; + struct list_head xfrm_state_pol_flow_match; + struct list_head xfrm_decode_session; +#endif /* CONFIG_SECURITY_NETWORK_XFRM */ +#ifdef CONFIG_KEYS + struct list_head key_alloc; + struct list_head key_free; + struct list_head key_permission; + struct list_head key_getsecurity; +#endif /* CONFIG_KEYS */ +#ifdef CONFIG_AUDIT + struct list_head audit_rule_init; + struct list_head audit_rule_known; + struct list_head audit_rule_match; + struct list_head audit_rule_free; +#endif /* CONFIG_AUDIT */ +}; + +/* + * Initializing a security_hook_list structure takes + * up a lot of space in a source file. This macro takes + * care of the common case and reduces the amount of + * text involved. + * Casey says: Comment is true in the next patch. + */ +#define LSM_HOOK_INIT(HEAD, HOOK) .HEAD = HOOK + /* prototypes */ extern int security_module_enable(struct security_operations *ops); extern int register_security(struct security_operations *ops); -- cgit v1.2.3 From b1d9e6b0646d0e5ee5d9050bd236b6c65d66faef Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Sat, 2 May 2015 15:11:42 -0700 Subject: LSM: Switch to lists of hooks Instead of using a vector of security operations with explicit, special case stacking of the capability and yama hooks use lists of hooks with capability and yama hooks included as appropriate. The security_operations structure is no longer required. Instead, there is a union of the function pointers that allows all the hooks lists to use a common mechanism for list management while retaining typing. Each module supplies an array describing the hooks it provides instead of a sparsely populated security_operations structure. The description includes the element that gets put on the hook list, avoiding the issues surrounding individual element allocation. The method for registering security modules is changed to reflect the information available. The method for removing a module, currently only used by SELinux, has also changed. It should be generic now, however if there are potential race conditions based on ordering of hook removal that needs to be addressed by the calling module. The security hooks are called from the lists and the first failure is returned. Signed-off-by: Casey Schaufler Acked-by: John Johansen Acked-by: Kees Cook Acked-by: Paul Moore Acked-by: Stephen Smalley Acked-by: Tetsuo Handa Signed-off-by: James Morris --- include/linux/lsm_hooks.h | 77 ++++++++++++++++++++++++++++++++--------------- include/linux/security.h | 46 +++------------------------- 2 files changed, 57 insertions(+), 66 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 27dd6fcacccc..f014f2596e22 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -25,21 +25,10 @@ #define __LINUX_LSM_HOOKS_H #include - -/* Maximum number of letters for an LSM name string */ -#define SECURITY_NAME_MAX 10 - -#ifdef CONFIG_SECURITY +#include +#include /** - * struct security_operations - main security structure - * - * Security module identifier. - * - * @name: - * A string that acts as a unique identifier for the LSM with max number - * of characters = SECURITY_NAME_MAX. - * * Security hooks for program execution operations. * * @bprm_set_creds: @@ -1310,9 +1299,7 @@ * This is the main security structure. */ -struct security_operations { - char name[SECURITY_NAME_MAX + 1]; - +union security_list_options { int (*binder_set_context_mgr)(struct task_struct *mgr); int (*binder_transaction)(struct task_struct *from, struct task_struct *to); @@ -1837,21 +1824,63 @@ struct security_hook_heads { #endif /* CONFIG_AUDIT */ }; +/* + * Security module hook list structure. + * For use with generic list macros for common operations. + */ +struct security_hook_list { + struct list_head list; + struct list_head *head; + union security_list_options hook; +}; + /* * Initializing a security_hook_list structure takes * up a lot of space in a source file. This macro takes * care of the common case and reduces the amount of * text involved. - * Casey says: Comment is true in the next patch. */ -#define LSM_HOOK_INIT(HEAD, HOOK) .HEAD = HOOK +#define LSM_HOOK_INIT(HEAD, HOOK) \ + { .head = &security_hook_heads.HEAD, .hook = { .HEAD = HOOK } } + +extern struct security_hook_heads security_hook_heads; + +static inline void security_add_hooks(struct security_hook_list *hooks, + int count) +{ + int i; -/* prototypes */ -extern int security_module_enable(struct security_operations *ops); -extern int register_security(struct security_operations *ops); -extern void __init security_fixup_ops(struct security_operations *ops); -extern void reset_security_ops(void); + for (i = 0; i < count; i++) + list_add_tail_rcu(&hooks[i].list, hooks[i].head); +} -#endif /* CONFIG_SECURITY */ +#ifdef CONFIG_SECURITY_SELINUX_DISABLE +/* + * Assuring the safety of deleting a security module is up to + * the security module involved. This may entail ordering the + * module's hook list in a particular way, refusing to disable + * the module once a policy is loaded or any number of other + * actions better imagined than described. + * + * The name of the configuration option reflects the only module + * that currently uses the mechanism. Any developer who thinks + * disabling their module is a good idea needs to be at least as + * careful as the SELinux team. + */ +static inline void security_delete_hooks(struct security_hook_list *hooks, + int count) +{ + int i; + + for (i = 0; i < count; i++) + list_del_rcu(&hooks[i].list); +} +#endif /* CONFIG_SECURITY_SELINUX_DISABLE */ + +extern int __init security_module_enable(const char *module); +extern void __init capability_add_hooks(void); +#ifdef CONFIG_SECURITY_YAMA_STACKED +void __init yama_add_hooks(void); +#endif #endif /* ! __LINUX_LSM_HOOKS_H */ diff --git a/include/linux/security.h b/include/linux/security.h index a2a100e7ac6e..8c8175d41b4c 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -27,6 +27,7 @@ #include #include #include +#include struct linux_binprm; struct cred; @@ -54,9 +55,6 @@ struct xattr; struct xfrm_sec_ctx; struct mm_struct; -/* Maximum number of letters for an LSM name string */ -#define SECURITY_NAME_MAX 10 - /* If capable should audit the security request */ #define SECURITY_CAP_NOAUDIT 0 #define SECURITY_CAP_AUDIT 1 @@ -69,10 +67,7 @@ struct audit_krule; struct user_namespace; struct timezone; -/* - * These functions are in security/capability.c and are used - * as the default capabilities functions - */ +/* These functions are in security/commoncap.c */ extern int cap_capable(const struct cred *cred, struct user_namespace *ns, int cap, int audit); extern int cap_settime(const struct timespec *ts, const struct timezone *tz); @@ -114,8 +109,6 @@ struct xfrm_state; struct xfrm_user_sec_ctx; struct seq_file; -extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb); - #ifdef CONFIG_MMU extern unsigned long mmap_min_addr; extern unsigned long dac_mmap_min_addr; @@ -472,7 +465,7 @@ static inline int security_settime(const struct timespec *ts, static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) { - return cap_vm_enough_memory(mm, pages); + return __vm_enough_memory(mm, pages, cap_vm_enough_memory(mm, pages)); } static inline int security_bprm_set_creds(struct linux_binprm *bprm) @@ -1075,7 +1068,7 @@ static inline int security_setprocattr(struct task_struct *p, char *name, void * static inline int security_netlink_send(struct sock *sk, struct sk_buff *skb) { - return cap_netlink_send(sk, skb); + return 0; } static inline int security_ismaclabel(const char *name) @@ -1643,36 +1636,5 @@ static inline void free_secdata(void *secdata) { } #endif /* CONFIG_SECURITY */ -#ifdef CONFIG_SECURITY_YAMA -extern int yama_ptrace_access_check(struct task_struct *child, - unsigned int mode); -extern int yama_ptrace_traceme(struct task_struct *parent); -extern void yama_task_free(struct task_struct *task); -extern int yama_task_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5); -#else -static inline int yama_ptrace_access_check(struct task_struct *child, - unsigned int mode) -{ - return 0; -} - -static inline int yama_ptrace_traceme(struct task_struct *parent) -{ - return 0; -} - -static inline void yama_task_free(struct task_struct *task) -{ -} - -static inline int yama_task_prctl(int option, unsigned long arg2, - unsigned long arg3, unsigned long arg4, - unsigned long arg5) -{ - return -ENOSYS; -} -#endif /* CONFIG_SECURITY_YAMA */ - #endif /* ! __LINUX_SECURITY_H */ -- cgit v1.2.3 From 6a4b6b0a3b55f23f4cc9ad85a1539c581bcb0874 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 4 May 2015 17:10:31 +0200 Subject: gpio: sysfs: clean up chip class-device handling Clean gpio-chip class device registration and deregistration. The class device is registered when a gpio-chip is added (or from gpiolib_sysfs_init post-core init call), and deregistered when the chip is removed. Store the class device in struct gpio_chip directly rather than do a class-device lookup on deregistration. This also removes the need for the exported flag. Signed-off-by: Johan Hovold Reviewed-by: Alexandre Courbot Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index f1b36593ec9f..2c1e639f66bd 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -20,6 +20,7 @@ struct seq_file; * struct gpio_chip - abstract a GPIO controller * @label: for diagnostics * @dev: optional device providing the GPIOs + * @cdev: class device used by sysfs interface (may be NULL) * @owner: helps prevent removal of modules exporting active GPIOs * @list: links gpio_chips together for traversal * @request: optional hook for chip-specific activation, such as @@ -57,7 +58,6 @@ struct seq_file; * implies that if the chip supports IRQs, these IRQs need to be threaded * as the chip access may sleep when e.g. reading out the IRQ status * registers. - * @exported: flags if the gpiochip is exported for use from sysfs. Private. * @irq_not_threaded: flag must be set if @can_sleep is set but the * IRQs don't need to be threaded * @@ -74,6 +74,7 @@ struct seq_file; struct gpio_chip { const char *label; struct device *dev; + struct device *cdev; struct module *owner; struct list_head list; @@ -109,7 +110,6 @@ struct gpio_chip { const char *const *names; bool can_sleep; bool irq_not_threaded; - bool exported; #ifdef CONFIG_GPIOLIB_IRQCHIP /* -- cgit v1.2.3 From 166a85e44245d771bd7042f3ad72aa0e12bb53bd Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 4 May 2015 17:10:33 +0200 Subject: gpio: remove gpiod_sysfs_set_active_low Remove gpiod_sysfs_set_active_low (and gpio_sysfs_set_active_low) which allowed code to change the polarity of a gpio line even after it had been exported through sysfs. Drivers should not care, and generally does not know, about gpio-line polarity which is a hardware feature that needs to be described by firmware. It is currently possible to define gpio-line polarity in device-tree and acpi firmware or using platform data. Userspace can also change the polarity through sysfs. Note that drivers using the legacy gpio interface could still use GPIOF_ACTIVE_LOW to change the polarity before exporting the gpio. There are no in-kernel users of this interface. Cc: Jonathan Corbet Cc: Harry Wei Cc: Arnd Bergmann Cc: linux-doc@vger.kernel.org Cc: linux-kernel@zh-kernel.org Cc: linux-arch@vger.kernel.org Signed-off-by: Johan Hovold Reviewed-by: Alexandre Courbot Signed-off-by: Linus Walleij --- include/linux/gpio.h | 7 ------- include/linux/gpio/consumer.h | 6 ------ 2 files changed, 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio.h b/include/linux/gpio.h index ab81339a8590..d12b5d566e4b 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -196,13 +196,6 @@ static inline int gpio_export_link(struct device *dev, const char *name, return -EINVAL; } -static inline int gpio_sysfs_set_active_low(unsigned gpio, int value) -{ - /* GPIO can never have been requested */ - WARN_ON(1); - return -EINVAL; -} - static inline void gpio_unexport(unsigned gpio) { /* GPIO can never have been exported */ diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 3a7c9ffd5ab9..09a7fb0062a6 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -449,7 +449,6 @@ static inline int desc_to_gpio(const struct gpio_desc *desc) int gpiod_export(struct gpio_desc *desc, bool direction_may_change); int gpiod_export_link(struct device *dev, const char *name, struct gpio_desc *desc); -int gpiod_sysfs_set_active_low(struct gpio_desc *desc, int value); void gpiod_unexport(struct gpio_desc *desc); #else /* CONFIG_GPIOLIB && CONFIG_GPIO_SYSFS */ @@ -466,11 +465,6 @@ static inline int gpiod_export_link(struct device *dev, const char *name, return -ENOSYS; } -static inline int gpiod_sysfs_set_active_low(struct gpio_desc *desc, int value) -{ - return -ENOSYS; -} - static inline void gpiod_unexport(struct gpio_desc *desc) { } -- cgit v1.2.3 From 2d94e5224e81c58986a8cf44a3bf4830ce5cb96e Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 7 May 2015 14:26:34 -0700 Subject: HID: hid-sensor-hub: Fix debug lock warning When CONFIG_DEBUG_LOCK_ALLOC is defined, mutex magic is compared and warned for (l->magic != l), here l is the address of mutex pointer. In hid-sensor-hub as part of hsdev creation, a per hsdev mutex is initialized during MFD cell creation. This hsdev, which contains, mutex is part of platform data for the a cell. But platform_data is copied in platform_device_add_data() in platform.c. This copy will copy the whole hsdev structure including mutex. But once copied the magic will no longer match. So when client driver call sensor_hub_input_attr_get_raw_value, this will trigger mutex warning. So to avoid this allocate mutex dynamically. This will be same even after copy. Signed-off-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- include/linux/hid-sensor-hub.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h index 0408421d885f..0042bf330b99 100644 --- a/include/linux/hid-sensor-hub.h +++ b/include/linux/hid-sensor-hub.h @@ -74,7 +74,7 @@ struct sensor_hub_pending { * @usage: Usage id for this hub device instance. * @start_collection_index: Starting index for a phy type collection * @end_collection_index: Last index for a phy type collection - * @mutex: synchronizing mutex. + * @mutex_ptr: synchronizing mutex pointer. * @pending: Holds information of pending sync read request. */ struct hid_sensor_hub_device { @@ -84,7 +84,7 @@ struct hid_sensor_hub_device { u32 usage; int start_collection_index; int end_collection_index; - struct mutex mutex; + struct mutex *mutex_ptr; struct sensor_hub_pending pending; }; -- cgit v1.2.3 From 0e39250845c0f91acc64264709b25f7f9b85c2c3 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 6 May 2015 21:11:51 -0700 Subject: net: Store virtual address instead of page in netdev_alloc_cache This change makes it so that we store the virtual address of the page in the netdev_alloc_cache instead of the page pointer. The idea behind this is to avoid multiple calls to page_address since the virtual address is required for every access, but the page pointer is only needed at allocation or reset of the page. While I was at it I also reordered the netdev_alloc_cache structure a bit so that the size is always 16 bytes by dropping size in the case where PAGE_SIZE is greater than or equal to 32KB. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9c2f793573fa..8b9a2c35a9d7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2128,9 +2128,8 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) kfree_skb(skb); } -#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768) -#define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER) -#define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE +#define NETDEV_FRAG_PAGE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) +#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(NETDEV_FRAG_PAGE_MAX_SIZE) void *netdev_alloc_frag(unsigned int fragsz); -- cgit v1.2.3 From b63ae8ca096dfdbfeef6a209c30a93a966518853 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 6 May 2015 21:11:57 -0700 Subject: mm/net: Rename and move page fragment handling from net/ to mm/ This change moves the __alloc_page_frag functionality out of the networking stack and into the page allocation portion of mm. The idea it so help make this maintainable by placing it with other page allocation functions. Since we are moving it from skbuff.c to page_alloc.c I have also renamed the basic defines and structure from netdev_alloc_cache to page_frag_cache to reflect that this is now part of a different kernel subsystem. I have also added a simple __free_page_frag function which can handle freeing the frags based on the skb->head pointer. The model for this is based off of __free_pages since we don't actually need to deal with all of the cases that put_page handles. I incorporated the virt_to_head_page call and compound_order into the function as it actually allows for a signficant size reduction by reducing code duplication. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/gfp.h | 5 +++++ include/linux/mm_types.h | 18 ++++++++++++++++++ include/linux/skbuff.h | 3 --- 3 files changed, 23 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 97a9373e61e8..70a7fee1efb3 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -366,6 +366,11 @@ extern void free_pages(unsigned long addr, unsigned int order); extern void free_hot_cold_page(struct page *page, bool cold); extern void free_hot_cold_page_list(struct list_head *list, bool cold); +struct page_frag_cache; +extern void *__alloc_page_frag(struct page_frag_cache *nc, + unsigned int fragsz, gfp_t gfp_mask); +extern void __free_page_frag(void *addr); + extern void __free_kmem_pages(struct page *page, unsigned int order); extern void free_kmem_pages(unsigned long addr, unsigned int order); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 8d37e26a1007..0038ac7466fd 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -226,6 +226,24 @@ struct page_frag { #endif }; +#define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) +#define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE) + +struct page_frag_cache { + void * va; +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) + __u16 offset; + __u16 size; +#else + __u32 offset; +#endif + /* we maintain a pagecount bias, so that we dont dirty cache line + * containing page->_count every time we allocate a fragment. + */ + unsigned int pagecnt_bias; + bool pfmemalloc; +}; + typedef unsigned long __nocast vm_flags_t; /* diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8b9a2c35a9d7..0039fcc45b3b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2128,9 +2128,6 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) kfree_skb(skb); } -#define NETDEV_FRAG_PAGE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) -#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(NETDEV_FRAG_PAGE_MAX_SIZE) - void *netdev_alloc_frag(unsigned int fragsz); struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, -- cgit v1.2.3 From 181edb2bfa22b50817684135ab6430ed2808abf0 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 6 May 2015 21:12:03 -0700 Subject: net: Add skb_free_frag to replace use of put_page in freeing skb->head This change adds a function called skb_free_frag which is meant to compliment the function netdev_alloc_frag. The general idea is to enable a more lightweight version of page freeing since we don't actually need all the overhead of a put_page, and we don't quite fit the model of __free_pages. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0039fcc45b3b..c0b574a414e7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2182,6 +2182,11 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev, return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC); } +static inline void skb_free_frag(void *addr) +{ + __free_page_frag(addr); +} + void *napi_alloc_frag(unsigned int fragsz); struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int length, gfp_t gfp_mask); -- cgit v1.2.3 From 755a27e7e4c817dd51ade41668b380f26026899c Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Sun, 3 May 2015 18:18:02 +0800 Subject: tracing: remove unused ftrace_output_event() prototype The prototype of ftrace_output_event was added by commit 1d6bae966e90 ("tracing: Move raw output code from macro to standalone function") but this function was not defined anywhere, and is still nowhere to be found. Link: http://lkml.kernel.org/r/1430648282-25792-1-git-send-email-nicolas.iooss_linux@m4x.org Signed-off-by: Nicolas Iooss Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index f9ecf63d47f1..65ce6de91307 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -219,9 +219,6 @@ struct ftrace_event_class { extern int ftrace_event_reg(struct ftrace_event_call *event, enum trace_reg type, void *data); -int ftrace_output_event(struct trace_iterator *iter, struct ftrace_event_call *event, - char *fmt, ...); - int ftrace_event_define_field(struct ftrace_event_call *call, char *type, int len, char *item, int offset, int field_size, int sign, int filter); -- cgit v1.2.3 From 336b7e1f230912cd8df2497be8dd7be4647d8fc8 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 11 May 2015 14:06:32 -0400 Subject: block: remove export for blk_queue_bio With commit ff36ab345 ("dm: remove request-based logic from make_request_fn wrapper") DM no longer calls blk_queue_bio() directly, so remove its export. Doing so required a forward declaration in blk-core.c. Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7f9a516f24de..5d93a6645e88 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -821,8 +821,6 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); -extern void blk_queue_bio(struct request_queue *q, struct bio *bio); - /* * A queue has just exitted congestion. Note this in the global counter of * congested queues, and wake up anyone who was waiting for requests to be -- cgit v1.2.3 From 020af89a41c41fd2c92d0da524968dfaba6269f0 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Mon, 27 Apr 2015 21:24:30 +0300 Subject: PM / sleep: Add macro to define common noirq system PM callbacks The same approach is used as for the existing SET_SYSTEM_SLEEP_PM_OPS, but for noirq callbacks. New SET_NOIRQ_SYSTEM_SLEEP_PM_OPS, defined for CONFIG_PM_SLEEP, will point ->suspend_noirq, ->freeze_noirq and ->poweroff_noirq to the same function. Vice versa happens for ->resume_noirq, ->thaw_noirq and ->restore_noirq. Signed-off-by: Grygorii Strashko Acked-by: Santosh Shilimkar Reviewed-by: Ulf Hansson Acked-by: Pavel Machek Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 2d29c64f8fb1..4890743892ef 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -342,6 +342,18 @@ struct dev_pm_ops { #define SET_LATE_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) #endif +#ifdef CONFIG_PM_SLEEP +#define SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ + .suspend_noirq = suspend_fn, \ + .resume_noirq = resume_fn, \ + .freeze_noirq = suspend_fn, \ + .thaw_noirq = resume_fn, \ + .poweroff_noirq = suspend_fn, \ + .restore_noirq = resume_fn, +#else +#define SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) +#endif + #ifdef CONFIG_PM #define SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ .runtime_suspend = suspend_fn, \ -- cgit v1.2.3 From 75f504004ab866c8f84749303b0f70953724e259 Mon Sep 17 00:00:00 2001 From: Rajendra Nayak Date: Thu, 23 Apr 2015 14:03:09 +0530 Subject: PM / clock_ops: Provide default runtime ops to users Most users of PM clocks do the extact same things in the runtime suspend/resume callbacks. Provide them USE_PM_CLK_RUNTIME_OPS so as to avoid/remove boilerplate code. Signed-off-by: Rajendra Nayak Reviewed-by: Kevin Hilman Acked-by: Santosh Shilimkar Acked-by: Geert Uytterhoeven Signed-off-by: Rafael J. Wysocki --- include/linux/pm_clock.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_clock.h b/include/linux/pm_clock.h index 0b0039634410..25266c600021 100644 --- a/include/linux/pm_clock.h +++ b/include/linux/pm_clock.h @@ -20,6 +20,16 @@ struct pm_clk_notifier_block { struct clk; +#ifdef CONFIG_PM +extern int pm_clk_runtime_suspend(struct device *dev); +extern int pm_clk_runtime_resume(struct device *dev); +#define USE_PM_CLK_RUNTIME_OPS \ + .runtime_suspend = pm_clk_runtime_suspend, \ + .runtime_resume = pm_clk_runtime_resume, +#else +#define USE_PM_CLK_RUNTIME_OPS +#endif + #ifdef CONFIG_PM_CLK static inline bool pm_clk_no_clocks(struct device *dev) { -- cgit v1.2.3 From 9d47c0a2d958e06322c88245749278633d333cca Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sun, 10 May 2015 09:47:47 -0700 Subject: switchdev: s/swdev_/switchdev_/ Turned out that "switchdev" sticks. So just unify all related terms to use this prefix. Signed-off-by: Jiri Pirko Signed-off-by: Scott Feldman Acked-by: Roopa Prabhu Acked-by: Andy Gospodarek Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a6d706b2a947..2b39235b9f13 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1567,7 +1567,7 @@ struct net_device { const struct net_device_ops *netdev_ops; const struct ethtool_ops *ethtool_ops; #ifdef CONFIG_NET_SWITCHDEV - const struct swdev_ops *swdev_ops; + const struct switchdev_ops *switchdev_ops; #endif const struct header_ops *header_ops; -- cgit v1.2.3 From 7889cbee8357aaed85898d028829dfb4f75bae2c Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 10 May 2015 09:48:07 -0700 Subject: switchdev: remove NETIF_F_HW_SWITCH_OFFLOAD feature flag Roopa said remove the feature flag for this series and she'll work on bringing it back if needed at a later date. Signed-off-by: Scott Feldman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 7d59dc6ab789..9672781c593d 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -66,7 +66,6 @@ enum { NETIF_F_HW_VLAN_STAG_FILTER_BIT,/* Receive filtering on VLAN STAGs */ NETIF_F_HW_L2FW_DOFFLOAD_BIT, /* Allow L2 Forwarding in Hardware */ NETIF_F_BUSY_POLL_BIT, /* Busy poll */ - NETIF_F_HW_SWITCH_OFFLOAD_BIT, /* HW switch offload */ /* * Add your fresh new feature above and remember to update @@ -125,7 +124,6 @@ enum { #define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX) #define NETIF_F_HW_L2FW_DOFFLOAD __NETIF_F(HW_L2FW_DOFFLOAD) #define NETIF_F_BUSY_POLL __NETIF_F(BUSY_POLL) -#define NETIF_F_HW_SWITCH_OFFLOAD __NETIF_F(HW_SWITCH_OFFLOAD) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ @@ -161,8 +159,7 @@ enum { */ #define NETIF_F_ONE_FOR_ALL (NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ROBUST | \ NETIF_F_SG | NETIF_F_HIGHDMA | \ - NETIF_F_FRAGLIST | NETIF_F_VLAN_CHALLENGED | \ - NETIF_F_HW_SWITCH_OFFLOAD) + NETIF_F_FRAGLIST | NETIF_F_VLAN_CHALLENGED) /* * If one device doesn't support one of these features, then disable it -- cgit v1.2.3 From 5d1d65f8bea6de3d9c2c60fdfdd2da02da5ea672 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 11 May 2015 17:48:12 +0800 Subject: crypto: aead - Convert top level interface to new style This patch converts the top-level aead interface to the new style. All user-level AEAD interface code have been moved into crypto/aead.h. The allocation/free functions have switched over to the new way of allocating tfms. This patch also removes the double indrection on setkey so the indirection now exists only at the alg level. Apart from these there are no user-visible changes. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 442 +------------------------------------------------ 1 file changed, 1 insertion(+), 441 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index ee14140f8893..59ca4086ce6a 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -140,6 +140,7 @@ struct crypto_blkcipher; struct crypto_hash; struct crypto_tfm; struct crypto_type; +struct aead_request; struct aead_givcrypt_request; struct skcipher_givcrypt_request; @@ -174,32 +175,6 @@ struct ablkcipher_request { void *__ctx[] CRYPTO_MINALIGN_ATTR; }; -/** - * struct aead_request - AEAD request - * @base: Common attributes for async crypto requests - * @assoclen: Length in bytes of associated data for authentication - * @cryptlen: Length of data to be encrypted or decrypted - * @iv: Initialisation vector - * @assoc: Associated data - * @src: Source data - * @dst: Destination data - * @__ctx: Start of private context data - */ -struct aead_request { - struct crypto_async_request base; - - unsigned int assoclen; - unsigned int cryptlen; - - u8 *iv; - - struct scatterlist *assoc; - struct scatterlist *src; - struct scatterlist *dst; - - void *__ctx[] CRYPTO_MINALIGN_ATTR; -}; - struct blkcipher_desc { struct crypto_blkcipher *tfm; void *info; @@ -572,21 +547,6 @@ struct ablkcipher_tfm { unsigned int reqsize; }; -struct aead_tfm { - int (*setkey)(struct crypto_aead *tfm, const u8 *key, - unsigned int keylen); - int (*encrypt)(struct aead_request *req); - int (*decrypt)(struct aead_request *req); - int (*givencrypt)(struct aead_givcrypt_request *req); - int (*givdecrypt)(struct aead_givcrypt_request *req); - - struct crypto_aead *base; - - unsigned int ivsize; - unsigned int authsize; - unsigned int reqsize; -}; - struct blkcipher_tfm { void *iv; int (*setkey)(struct crypto_tfm *tfm, const u8 *key, @@ -626,7 +586,6 @@ struct compress_tfm { }; #define crt_ablkcipher crt_u.ablkcipher -#define crt_aead crt_u.aead #define crt_blkcipher crt_u.blkcipher #define crt_cipher crt_u.cipher #define crt_hash crt_u.hash @@ -638,7 +597,6 @@ struct crypto_tfm { union { struct ablkcipher_tfm ablkcipher; - struct aead_tfm aead; struct blkcipher_tfm blkcipher; struct cipher_tfm cipher; struct hash_tfm hash; @@ -656,10 +614,6 @@ struct crypto_ablkcipher { struct crypto_tfm base; }; -struct crypto_aead { - struct crypto_tfm base; -}; - struct crypto_blkcipher { struct crypto_tfm base; }; @@ -1151,400 +1105,6 @@ static inline void ablkcipher_request_set_crypt( req->info = iv; } -/** - * DOC: Authenticated Encryption With Associated Data (AEAD) Cipher API - * - * The AEAD cipher API is used with the ciphers of type CRYPTO_ALG_TYPE_AEAD - * (listed as type "aead" in /proc/crypto) - * - * The most prominent examples for this type of encryption is GCM and CCM. - * However, the kernel supports other types of AEAD ciphers which are defined - * with the following cipher string: - * - * authenc(keyed message digest, block cipher) - * - * For example: authenc(hmac(sha256), cbc(aes)) - * - * The example code provided for the asynchronous block cipher operation - * applies here as well. Naturally all *ablkcipher* symbols must be exchanged - * the *aead* pendants discussed in the following. In addtion, for the AEAD - * operation, the aead_request_set_assoc function must be used to set the - * pointer to the associated data memory location before performing the - * encryption or decryption operation. In case of an encryption, the associated - * data memory is filled during the encryption operation. For decryption, the - * associated data memory must contain data that is used to verify the integrity - * of the decrypted data. Another deviation from the asynchronous block cipher - * operation is that the caller should explicitly check for -EBADMSG of the - * crypto_aead_decrypt. That error indicates an authentication error, i.e. - * a breach in the integrity of the message. In essence, that -EBADMSG error - * code is the key bonus an AEAD cipher has over "standard" block chaining - * modes. - */ - -static inline struct crypto_aead *__crypto_aead_cast(struct crypto_tfm *tfm) -{ - return (struct crypto_aead *)tfm; -} - -/** - * crypto_alloc_aead() - allocate AEAD cipher handle - * @alg_name: is the cra_name / name or cra_driver_name / driver name of the - * AEAD cipher - * @type: specifies the type of the cipher - * @mask: specifies the mask for the cipher - * - * Allocate a cipher handle for an AEAD. The returned struct - * crypto_aead is the cipher handle that is required for any subsequent - * API invocation for that AEAD. - * - * Return: allocated cipher handle in case of success; IS_ERR() is true in case - * of an error, PTR_ERR() returns the error code. - */ -struct crypto_aead *crypto_alloc_aead(const char *alg_name, u32 type, u32 mask); - -static inline struct crypto_tfm *crypto_aead_tfm(struct crypto_aead *tfm) -{ - return &tfm->base; -} - -/** - * crypto_free_aead() - zeroize and free aead handle - * @tfm: cipher handle to be freed - */ -static inline void crypto_free_aead(struct crypto_aead *tfm) -{ - crypto_free_tfm(crypto_aead_tfm(tfm)); -} - -static inline struct aead_tfm *crypto_aead_crt(struct crypto_aead *tfm) -{ - return &crypto_aead_tfm(tfm)->crt_aead; -} - -/** - * crypto_aead_ivsize() - obtain IV size - * @tfm: cipher handle - * - * The size of the IV for the aead referenced by the cipher handle is - * returned. This IV size may be zero if the cipher does not need an IV. - * - * Return: IV size in bytes - */ -static inline unsigned int crypto_aead_ivsize(struct crypto_aead *tfm) -{ - return crypto_aead_crt(tfm)->ivsize; -} - -/** - * crypto_aead_authsize() - obtain maximum authentication data size - * @tfm: cipher handle - * - * The maximum size of the authentication data for the AEAD cipher referenced - * by the AEAD cipher handle is returned. The authentication data size may be - * zero if the cipher implements a hard-coded maximum. - * - * The authentication data may also be known as "tag value". - * - * Return: authentication data size / tag size in bytes - */ -static inline unsigned int crypto_aead_authsize(struct crypto_aead *tfm) -{ - return crypto_aead_crt(tfm)->authsize; -} - -/** - * crypto_aead_blocksize() - obtain block size of cipher - * @tfm: cipher handle - * - * The block size for the AEAD referenced with the cipher handle is returned. - * The caller may use that information to allocate appropriate memory for the - * data returned by the encryption or decryption operation - * - * Return: block size of cipher - */ -static inline unsigned int crypto_aead_blocksize(struct crypto_aead *tfm) -{ - return crypto_tfm_alg_blocksize(crypto_aead_tfm(tfm)); -} - -static inline unsigned int crypto_aead_alignmask(struct crypto_aead *tfm) -{ - return crypto_tfm_alg_alignmask(crypto_aead_tfm(tfm)); -} - -static inline u32 crypto_aead_get_flags(struct crypto_aead *tfm) -{ - return crypto_tfm_get_flags(crypto_aead_tfm(tfm)); -} - -static inline void crypto_aead_set_flags(struct crypto_aead *tfm, u32 flags) -{ - crypto_tfm_set_flags(crypto_aead_tfm(tfm), flags); -} - -static inline void crypto_aead_clear_flags(struct crypto_aead *tfm, u32 flags) -{ - crypto_tfm_clear_flags(crypto_aead_tfm(tfm), flags); -} - -/** - * crypto_aead_setkey() - set key for cipher - * @tfm: cipher handle - * @key: buffer holding the key - * @keylen: length of the key in bytes - * - * The caller provided key is set for the AEAD referenced by the cipher - * handle. - * - * Note, the key length determines the cipher type. Many block ciphers implement - * different cipher modes depending on the key size, such as AES-128 vs AES-192 - * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128 - * is performed. - * - * Return: 0 if the setting of the key was successful; < 0 if an error occurred - */ -static inline int crypto_aead_setkey(struct crypto_aead *tfm, const u8 *key, - unsigned int keylen) -{ - struct aead_tfm *crt = crypto_aead_crt(tfm); - - return crt->setkey(crt->base, key, keylen); -} - -/** - * crypto_aead_setauthsize() - set authentication data size - * @tfm: cipher handle - * @authsize: size of the authentication data / tag in bytes - * - * Set the authentication data size / tag size. AEAD requires an authentication - * tag (or MAC) in addition to the associated data. - * - * Return: 0 if the setting of the key was successful; < 0 if an error occurred - */ -int crypto_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize); - -static inline struct crypto_aead *crypto_aead_reqtfm(struct aead_request *req) -{ - return __crypto_aead_cast(req->base.tfm); -} - -/** - * crypto_aead_encrypt() - encrypt plaintext - * @req: reference to the aead_request handle that holds all information - * needed to perform the cipher operation - * - * Encrypt plaintext data using the aead_request handle. That data structure - * and how it is filled with data is discussed with the aead_request_* - * functions. - * - * IMPORTANT NOTE The encryption operation creates the authentication data / - * tag. That data is concatenated with the created ciphertext. - * The ciphertext memory size is therefore the given number of - * block cipher blocks + the size defined by the - * crypto_aead_setauthsize invocation. The caller must ensure - * that sufficient memory is available for the ciphertext and - * the authentication tag. - * - * Return: 0 if the cipher operation was successful; < 0 if an error occurred - */ -static inline int crypto_aead_encrypt(struct aead_request *req) -{ - return crypto_aead_crt(crypto_aead_reqtfm(req))->encrypt(req); -} - -/** - * crypto_aead_decrypt() - decrypt ciphertext - * @req: reference to the ablkcipher_request handle that holds all information - * needed to perform the cipher operation - * - * Decrypt ciphertext data using the aead_request handle. That data structure - * and how it is filled with data is discussed with the aead_request_* - * functions. - * - * IMPORTANT NOTE The caller must concatenate the ciphertext followed by the - * authentication data / tag. That authentication data / tag - * must have the size defined by the crypto_aead_setauthsize - * invocation. - * - * - * Return: 0 if the cipher operation was successful; -EBADMSG: The AEAD - * cipher operation performs the authentication of the data during the - * decryption operation. Therefore, the function returns this error if - * the authentication of the ciphertext was unsuccessful (i.e. the - * integrity of the ciphertext or the associated data was violated); - * < 0 if an error occurred. - */ -static inline int crypto_aead_decrypt(struct aead_request *req) -{ - if (req->cryptlen < crypto_aead_authsize(crypto_aead_reqtfm(req))) - return -EINVAL; - - return crypto_aead_crt(crypto_aead_reqtfm(req))->decrypt(req); -} - -/** - * DOC: Asynchronous AEAD Request Handle - * - * The aead_request data structure contains all pointers to data required for - * the AEAD cipher operation. This includes the cipher handle (which can be - * used by multiple aead_request instances), pointer to plaintext and - * ciphertext, asynchronous callback function, etc. It acts as a handle to the - * aead_request_* API calls in a similar way as AEAD handle to the - * crypto_aead_* API calls. - */ - -/** - * crypto_aead_reqsize() - obtain size of the request data structure - * @tfm: cipher handle - * - * Return: number of bytes - */ -static inline unsigned int crypto_aead_reqsize(struct crypto_aead *tfm) -{ - return crypto_aead_crt(tfm)->reqsize; -} - -/** - * aead_request_set_tfm() - update cipher handle reference in request - * @req: request handle to be modified - * @tfm: cipher handle that shall be added to the request handle - * - * Allow the caller to replace the existing aead handle in the request - * data structure with a different one. - */ -static inline void aead_request_set_tfm(struct aead_request *req, - struct crypto_aead *tfm) -{ - req->base.tfm = crypto_aead_tfm(crypto_aead_crt(tfm)->base); -} - -/** - * aead_request_alloc() - allocate request data structure - * @tfm: cipher handle to be registered with the request - * @gfp: memory allocation flag that is handed to kmalloc by the API call. - * - * Allocate the request data structure that must be used with the AEAD - * encrypt and decrypt API calls. During the allocation, the provided aead - * handle is registered in the request data structure. - * - * Return: allocated request handle in case of success; IS_ERR() is true in case - * of an error, PTR_ERR() returns the error code. - */ -static inline struct aead_request *aead_request_alloc(struct crypto_aead *tfm, - gfp_t gfp) -{ - struct aead_request *req; - - req = kmalloc(sizeof(*req) + crypto_aead_reqsize(tfm), gfp); - - if (likely(req)) - aead_request_set_tfm(req, tfm); - - return req; -} - -/** - * aead_request_free() - zeroize and free request data structure - * @req: request data structure cipher handle to be freed - */ -static inline void aead_request_free(struct aead_request *req) -{ - kzfree(req); -} - -/** - * aead_request_set_callback() - set asynchronous callback function - * @req: request handle - * @flags: specify zero or an ORing of the flags - * CRYPTO_TFM_REQ_MAY_BACKLOG the request queue may back log and - * increase the wait queue beyond the initial maximum size; - * CRYPTO_TFM_REQ_MAY_SLEEP the request processing may sleep - * @compl: callback function pointer to be registered with the request handle - * @data: The data pointer refers to memory that is not used by the kernel - * crypto API, but provided to the callback function for it to use. Here, - * the caller can provide a reference to memory the callback function can - * operate on. As the callback function is invoked asynchronously to the - * related functionality, it may need to access data structures of the - * related functionality which can be referenced using this pointer. The - * callback function can access the memory via the "data" field in the - * crypto_async_request data structure provided to the callback function. - * - * Setting the callback function that is triggered once the cipher operation - * completes - * - * The callback function is registered with the aead_request handle and - * must comply with the following template - * - * void callback_function(struct crypto_async_request *req, int error) - */ -static inline void aead_request_set_callback(struct aead_request *req, - u32 flags, - crypto_completion_t compl, - void *data) -{ - req->base.complete = compl; - req->base.data = data; - req->base.flags = flags; -} - -/** - * aead_request_set_crypt - set data buffers - * @req: request handle - * @src: source scatter / gather list - * @dst: destination scatter / gather list - * @cryptlen: number of bytes to process from @src - * @iv: IV for the cipher operation which must comply with the IV size defined - * by crypto_aead_ivsize() - * - * Setting the source data and destination data scatter / gather lists. - * - * For encryption, the source is treated as the plaintext and the - * destination is the ciphertext. For a decryption operation, the use is - * reversed - the source is the ciphertext and the destination is the plaintext. - * - * IMPORTANT NOTE AEAD requires an authentication tag (MAC). For decryption, - * the caller must concatenate the ciphertext followed by the - * authentication tag and provide the entire data stream to the - * decryption operation (i.e. the data length used for the - * initialization of the scatterlist and the data length for the - * decryption operation is identical). For encryption, however, - * the authentication tag is created while encrypting the data. - * The destination buffer must hold sufficient space for the - * ciphertext and the authentication tag while the encryption - * invocation must only point to the plaintext data size. The - * following code snippet illustrates the memory usage - * buffer = kmalloc(ptbuflen + (enc ? authsize : 0)); - * sg_init_one(&sg, buffer, ptbuflen + (enc ? authsize : 0)); - * aead_request_set_crypt(req, &sg, &sg, ptbuflen, iv); - */ -static inline void aead_request_set_crypt(struct aead_request *req, - struct scatterlist *src, - struct scatterlist *dst, - unsigned int cryptlen, u8 *iv) -{ - req->src = src; - req->dst = dst; - req->cryptlen = cryptlen; - req->iv = iv; -} - -/** - * aead_request_set_assoc() - set the associated data scatter / gather list - * @req: request handle - * @assoc: associated data scatter / gather list - * @assoclen: number of bytes to process from @assoc - * - * For encryption, the memory is filled with the associated data. For - * decryption, the memory must point to the associated data. - */ -static inline void aead_request_set_assoc(struct aead_request *req, - struct scatterlist *assoc, - unsigned int assoclen) -{ - req->assoc = assoc; - req->assoclen = assoclen; -} - /** * DOC: Synchronous Block Cipher API * -- cgit v1.2.3 From a2029240e5836e73ebcc1a8ddb8c22d636f89c9a Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 11 May 2015 21:17:53 +0200 Subject: net: deinline netif_tx_stop_all_queues(), remove WARN_ON in netif_tx_stop_queue() These functions compile to 60 bytes of machine code each. With this .config: http://busybox.net/~vda/kernel_config there are 617 calls of netif_tx_stop_queue() and 49 calls of netif_tx_stop_all_queues() in vmlinux. To fix this, remove WARN_ON in netif_tx_stop_queue() as suggested by davem, and deinline netif_tx_stop_all_queues(). Change in code size is about 20k: text data bss dec hex filename 82426986 22255416 20627456 125309858 77813a2 vmlinux.before 82406248 22255416 20627456 125289120 777c2a0 vmlinux gcc-4.7.2 still creates deinlined version of netif_tx_stop_queue sometimes: $ nm --size-sort vmlinux | grep netif_tx_stop_queue | wc -l 190 ffffffff81b558a8 : ffffffff81b558a8: 55 push %rbp ffffffff81b558a9: 48 89 e5 mov %rsp,%rbp ffffffff81b558ac: f0 80 8f e0 01 00 00 lock orb $0x1,0x1e0(%rdi) ffffffff81b558b3: 01 ffffffff81b558b4: 5d pop %rbp ffffffff81b558b5: c3 retq This needs additional fixing. Signed-off-by: Denys Vlasenko CC: Alexei Starovoitov CC: Alexander Duyck CC: Joe Perches CC: David S. Miller CC: Jiri Pirko CC: linux-kernel@vger.kernel.org CC: netdev@vger.kernel.org CC: netfilter-devel@vger.kernel.org Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/netdevice.h | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2b39235b9f13..fa57915f440c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2559,10 +2559,6 @@ static inline void netif_tx_wake_all_queues(struct net_device *dev) static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue) { - if (WARN_ON(!dev_queue)) { - pr_info("netif_stop_queue() cannot be called before register_netdev()\n"); - return; - } set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); } @@ -2578,15 +2574,7 @@ static inline void netif_stop_queue(struct net_device *dev) netif_tx_stop_queue(netdev_get_tx_queue(dev, 0)); } -static inline void netif_tx_stop_all_queues(struct net_device *dev) -{ - unsigned int i; - - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - netif_tx_stop_queue(txq); - } -} +void netif_tx_stop_all_queues(struct net_device *dev); static inline bool netif_tx_queue_stopped(const struct netdev_queue *dev_queue) { -- cgit v1.2.3 From cffc642d93f9324a06dfbd7da9af29652952a248 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Mon, 11 May 2015 22:22:44 -0700 Subject: test_bpf: add 173 new testcases for eBPF add an exhaustive set of eBPF tests bringing total to: test_bpf: Summary: 233 PASSED, 0 FAILED, [0/226 JIT'ed] Signed-off-by: Michael Holzheu Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/filter.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 3c03a6085b82..ce1d72d34382 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -207,6 +207,16 @@ struct bpf_prog_aux; .off = OFF, \ .imm = 0 }) +/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */ + +#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Memory store, *(uint *) (dst_reg + off16) = imm32 */ #define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ -- cgit v1.2.3 From 01d460dd70adc858e15307332832183c622bee50 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 12 May 2015 09:37:00 -0600 Subject: net: Remove remaining remnants of pm_qos from netdevice.h Commit e2c6544829f removed pm_qos from struct net_device but left the comment and header file. Remove those. Signed-off-by: David Ahern Cc: Thomas Graf Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1899c74a7127..05b9a694e213 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -25,7 +25,6 @@ #ifndef _LINUX_NETDEVICE_H #define _LINUX_NETDEVICE_H -#include #include #include #include @@ -1499,8 +1498,6 @@ enum netdev_priv_flags { * * @qdisc_tx_busylock: XXX: need comments on this one * - * @pm_qos_req: Power Management QoS object - * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ -- cgit v1.2.3 From f7bcb70ebae0dcdb5a2d859b09e4465784d99029 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 8 May 2015 13:47:23 -0700 Subject: ktime: Fix ktime_divns to do signed division It was noted that the 32bit implementation of ktime_divns() was doing unsigned division and didn't properly handle negative values. And when a ktime helper was changed to utilize ktime_divns, it caused a regression on some IR blasters. See the following bugzilla for details: https://bugzilla.redhat.com/show_bug.cgi?id=1200353 This patch fixes the problem in ktime_divns by checking and preserving the sign bit, and then reapplying it if appropriate after the division, it also changes the return type to a s64 to make it more obvious this is expected. Nicolas also pointed out that negative dividers would cause infinite loops on 32bit systems, negative dividers is unlikely for users of this function, but out of caution this patch adds checks for negative dividers for both 32-bit (BUG_ON) and 64-bit(WARN_ON) versions to make sure no such use cases creep in. [ tglx: Hand an u64 to do_div() to avoid the compiler warning ] Fixes: 166afb64511e 'ktime: Sanitize ktime_to_us/ms conversion' Reported-and-tested-by: Trevor Cordes Signed-off-by: John Stultz Acked-by: Nicolas Pitre Cc: Ingo Molnar Cc: Josh Boyer Cc: One Thousand Gnomes Cc: Link: http://lkml.kernel.org/r/1431118043-23452-1-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- include/linux/ktime.h | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 5fc3d1083071..2b6a204bd8d4 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -166,19 +166,34 @@ static inline bool ktime_before(const ktime_t cmp1, const ktime_t cmp2) } #if BITS_PER_LONG < 64 -extern u64 __ktime_divns(const ktime_t kt, s64 div); -static inline u64 ktime_divns(const ktime_t kt, s64 div) +extern s64 __ktime_divns(const ktime_t kt, s64 div); +static inline s64 ktime_divns(const ktime_t kt, s64 div) { + /* + * Negative divisors could cause an inf loop, + * so bug out here. + */ + BUG_ON(div < 0); if (__builtin_constant_p(div) && !(div >> 32)) { - u64 ns = kt.tv64; - do_div(ns, div); - return ns; + s64 ns = kt.tv64; + u64 tmp = ns < 0 ? -ns : ns; + + do_div(tmp, div); + return ns < 0 ? -tmp : tmp; } else { return __ktime_divns(kt, div); } } #else /* BITS_PER_LONG < 64 */ -# define ktime_divns(kt, div) (u64)((kt).tv64 / (div)) +static inline s64 ktime_divns(const ktime_t kt, s64 div) +{ + /* + * 32-bit implementation cannot handle negative divisors, + * so catch them on 64bit as well. + */ + WARN_ON(div < 0); + return kt.tv64 / div; +} #endif static inline s64 ktime_to_us(const ktime_t kt) -- cgit v1.2.3 From 9abdffe286c1532a54d5aee31571d3029be4026c Mon Sep 17 00:00:00 2001 From: Sumit Semwal Date: Tue, 5 May 2015 14:56:15 +0530 Subject: dma-buf: add ref counting for module as exporter Add reference counting on a kernel module that exports dma-buf and implements its operations. This prevents the module from being unloaded while DMABUF file is in use. The original patch [1] was submitted by Tomasz Stanislawski, but this is a simpler way to do it. v3: call module_put() as late as possible, per gregkh's comment. v2: move owner to struct dma_buf, and use DEFINE_DMA_BUF_EXPORT_INFO macro to simplify the change. Acked-by: Greg Kroah-Hartman Signed-off-by: Sumit Semwal [1]: https://lkml.org/lkml/2012/8/8/163 --- include/linux/dma-buf.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 2f0b431b73e0..f98bd7068d55 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -115,6 +115,8 @@ struct dma_buf_ops { * @attachments: list of dma_buf_attachment that denotes all devices attached. * @ops: dma_buf_ops associated with this buffer object. * @exp_name: name of the exporter; useful for debugging. + * @owner: pointer to exporter module; used for refcounting when exporter is a + * kernel module. * @list_node: node for dma_buf accounting and debugging. * @priv: exporter specific private data for this buffer object. * @resv: reservation object linked to this dma-buf @@ -129,6 +131,7 @@ struct dma_buf { unsigned vmapping_counter; void *vmap_ptr; const char *exp_name; + struct module *owner; struct list_head list_node; void *priv; struct reservation_object *resv; @@ -164,7 +167,8 @@ struct dma_buf_attachment { /** * struct dma_buf_export_info - holds information needed to export a dma_buf - * @exp_name: name of the exporting module - useful for debugging. + * @exp_name: name of the exporter - useful for debugging. + * @owner: pointer to exporter module - used for refcounting kernel module * @ops: Attach allocator-defined dma buf ops to the new buffer * @size: Size of the buffer * @flags: mode flags for the file @@ -176,6 +180,7 @@ struct dma_buf_attachment { */ struct dma_buf_export_info { const char *exp_name; + struct module *owner; const struct dma_buf_ops *ops; size_t size; int flags; @@ -187,7 +192,8 @@ struct dma_buf_export_info { * helper macro for exporters; zeros and fills in most common values */ #define DEFINE_DMA_BUF_EXPORT_INFO(a) \ - struct dma_buf_export_info a = { .exp_name = KBUILD_MODNAME } + struct dma_buf_export_info a = { .exp_name = KBUILD_MODNAME, \ + .owner = THIS_MODULE } /** * get_dma_buf - convenience wrapper for get_file. -- cgit v1.2.3 From 25e4fe92a20bbffde87500615250f1d54bfb832f Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Tue, 12 May 2015 20:12:23 +0300 Subject: gpiolib: cleanup chained handler and data Clean up chained handler and handler data if they were set by gpiochip_set_chained_irqchip(). Signed-off-by: Dmitry Eremin-Solenikov Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 2c1e639f66bd..96a678842cde 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -121,6 +121,7 @@ struct gpio_chip { unsigned int irq_base; irq_flow_handler_t irq_handler; unsigned int irq_default_type; + int irq_parent; #endif #if defined(CONFIG_OF_GPIO) -- cgit v1.2.3 From 42c86547f4e5c2e81616c76ce9a2badce515c41f Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Wed, 11 Mar 2015 11:34:25 +0100 Subject: clk: Expose clk_hw_reparent() to providers To be used by clock implementations for switching to a new parent during rate change. Signed-off-by: Tomeu Vizoso Signed-off-by: Thierry Reding --- include/linux/clk-provider.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index df695313f975..51efb9ec3e37 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -589,6 +589,7 @@ long __clk_mux_determine_rate_closest(struct clk_hw *hw, unsigned long rate, unsigned long max_rate, unsigned long *best_parent_rate, struct clk_hw **best_parent_p); +void clk_hw_reparent(struct clk_hw *hw, struct clk_hw *new_parent); static inline void __clk_hw_set_clk(struct clk_hw *dst, struct clk_hw *src) { -- cgit v1.2.3 From f05be589ff32e87821b86845625ed3d402d37dc7 Mon Sep 17 00:00:00 2001 From: Boris BREZILLON Date: Fri, 10 Apr 2015 12:09:01 +0800 Subject: mfd: axp20x: Add AXP22x PMIC support Add support for the AXP22x PMIC devices to the existing AXP20x driver. This includes the AXP221 and AXP223, which are identical except for the external data bus. Only AXP221 is added for now. AXP223 will be added after it's Reduced Serial Bus (RSB) interface is supported. AXP22x defines a new set of registers, power supplies and regulators, but most of the API is similar to the AXP20x ones. A new irq chip definition is used, even though the available interrupts on AXP22x is a subset of those on AXP20x. This is done so the interrupt numbers match those on the datasheet. This patch only enables the interrupts, system power-off function, and PEK sub-device. The regulator driver must first support different variants before we enable it from the mfd driver. Signed-off-by: Boris BREZILLON [wens@csie.org: fix interrupts and move regulators to separate patch] Signed-off-by: Chen-Yu Tsai Signed-off-by: Lee Jones --- include/linux/mfd/axp20x.h | 86 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index dfabd6db7ddf..95568eb798c3 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -14,6 +14,7 @@ enum { AXP202_ID = 0, AXP209_ID, + AXP221_ID, AXP288_ID, NR_AXP20X_VARIANTS, }; @@ -45,6 +46,28 @@ enum { #define AXP20X_V_LTF_DISCHRG 0x3c #define AXP20X_V_HTF_DISCHRG 0x3d +#define AXP22X_PWR_OUT_CTRL1 0x10 +#define AXP22X_PWR_OUT_CTRL2 0x12 +#define AXP22X_PWR_OUT_CTRL3 0x13 +#define AXP22X_DLDO1_V_OUT 0x15 +#define AXP22X_DLDO2_V_OUT 0x16 +#define AXP22X_DLDO3_V_OUT 0x17 +#define AXP22X_DLDO4_V_OUT 0x18 +#define AXP22X_ELDO1_V_OUT 0x19 +#define AXP22X_ELDO2_V_OUT 0x1a +#define AXP22X_ELDO3_V_OUT 0x1b +#define AXP22X_DC5LDO_V_OUT 0x1c +#define AXP22X_DCDC1_V_OUT 0x21 +#define AXP22X_DCDC2_V_OUT 0x22 +#define AXP22X_DCDC3_V_OUT 0x23 +#define AXP22X_DCDC4_V_OUT 0x24 +#define AXP22X_DCDC5_V_OUT 0x25 +#define AXP22X_DCDC23_V_RAMP_CTRL 0x27 +#define AXP22X_ALDO1_V_OUT 0x28 +#define AXP22X_ALDO2_V_OUT 0x29 +#define AXP22X_ALDO3_V_OUT 0x2a +#define AXP22X_CHRG_CTRL3 0x35 + /* Interrupt */ #define AXP20X_IRQ1_EN 0x40 #define AXP20X_IRQ2_EN 0x41 @@ -100,6 +123,9 @@ enum { #define AXP20X_VBUS_MON 0x8b #define AXP20X_OVER_TMP 0x8f +#define AXP22X_PWREN_CTRL1 0x8c +#define AXP22X_PWREN_CTRL2 0x8d + /* GPIO */ #define AXP20X_GPIO0_CTRL 0x90 #define AXP20X_LDO5_V_OUT 0x91 @@ -108,6 +134,11 @@ enum { #define AXP20X_GPIO20_SS 0x94 #define AXP20X_GPIO3_CTRL 0x95 +#define AXP22X_LDO_IO0_V_OUT 0x91 +#define AXP22X_LDO_IO1_V_OUT 0x93 +#define AXP22X_GPIO_STATE 0x94 +#define AXP22X_GPIO_PULL_DOWN 0x95 + /* Battery */ #define AXP20X_CHRG_CC_31_24 0xb0 #define AXP20X_CHRG_CC_23_16 0xb1 @@ -120,6 +151,9 @@ enum { #define AXP20X_CC_CTRL 0xb8 #define AXP20X_FG_RES 0xb9 +/* AXP22X specific registers */ +#define AXP22X_BATLOW_THRES1 0xe6 + /* AXP288 specific registers */ #define AXP288_PMIC_ADC_H 0x56 #define AXP288_PMIC_ADC_L 0x57 @@ -158,6 +192,30 @@ enum { AXP20X_REG_ID_MAX, }; +enum { + AXP22X_DCDC1 = 0, + AXP22X_DCDC2, + AXP22X_DCDC3, + AXP22X_DCDC4, + AXP22X_DCDC5, + AXP22X_DC1SW, + AXP22X_DC5LDO, + AXP22X_ALDO1, + AXP22X_ALDO2, + AXP22X_ALDO3, + AXP22X_ELDO1, + AXP22X_ELDO2, + AXP22X_ELDO3, + AXP22X_DLDO1, + AXP22X_DLDO2, + AXP22X_DLDO3, + AXP22X_DLDO4, + AXP22X_RTC_LDO, + AXP22X_LDO_IO0, + AXP22X_LDO_IO1, + AXP22X_REG_ID_MAX, +}; + /* IRQs */ enum { AXP20X_IRQ_ACIN_OVER_V = 1, @@ -199,6 +257,34 @@ enum { AXP20X_IRQ_GPIO0_INPUT, }; +enum axp22x_irqs { + AXP22X_IRQ_ACIN_OVER_V = 1, + AXP22X_IRQ_ACIN_PLUGIN, + AXP22X_IRQ_ACIN_REMOVAL, + AXP22X_IRQ_VBUS_OVER_V, + AXP22X_IRQ_VBUS_PLUGIN, + AXP22X_IRQ_VBUS_REMOVAL, + AXP22X_IRQ_VBUS_V_LOW, + AXP22X_IRQ_BATT_PLUGIN, + AXP22X_IRQ_BATT_REMOVAL, + AXP22X_IRQ_BATT_ENT_ACT_MODE, + AXP22X_IRQ_BATT_EXIT_ACT_MODE, + AXP22X_IRQ_CHARG, + AXP22X_IRQ_CHARG_DONE, + AXP22X_IRQ_BATT_TEMP_HIGH, + AXP22X_IRQ_BATT_TEMP_LOW, + AXP22X_IRQ_DIE_TEMP_HIGH, + AXP22X_IRQ_PEK_SHORT, + AXP22X_IRQ_PEK_LONG, + AXP22X_IRQ_LOW_PWR_LVL1, + AXP22X_IRQ_LOW_PWR_LVL2, + AXP22X_IRQ_TIMER, + AXP22X_IRQ_PEK_RIS_EDGE, + AXP22X_IRQ_PEK_FAL_EDGE, + AXP22X_IRQ_GPIO1_INPUT, + AXP22X_IRQ_GPIO0_INPUT, +}; + enum axp288_irqs { AXP288_IRQ_VBUS_FALL = 2, AXP288_IRQ_VBUS_RISE, -- cgit v1.2.3 From 275e2bc0f25d5eb99c99ebb7293fc3722533124b Mon Sep 17 00:00:00 2001 From: Sergey Popovich Date: Sat, 2 May 2015 19:28:17 +0200 Subject: netfilter: ipset: Fix ext_*() macros So pointers returned by these macros could be referenced with -> directly. Signed-off-by: Sergey Popovich Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 34b172301558..f88be7258e5f 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -122,13 +122,13 @@ struct ip_set_skbinfo { struct ip_set; #define ext_timeout(e, s) \ -(unsigned long *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_TIMEOUT]) +((unsigned long *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_TIMEOUT])) #define ext_counter(e, s) \ -(struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER]) +((struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER])) #define ext_comment(e, s) \ -(struct ip_set_comment *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COMMENT]) +((struct ip_set_comment *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COMMENT])) #define ext_skbinfo(e, s) \ -(struct ip_set_skbinfo *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_SKBINFO]) +((struct ip_set_skbinfo *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_SKBINFO])) typedef int (*ipset_adtfn)(struct ip_set *set, void *value, const struct ip_set_ext *ext, -- cgit v1.2.3 From 289fcff4bcdb1dcc0ce8788b7ea0f58a9e4a495f Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 13 May 2015 15:26:42 +0300 Subject: usb: add bus type for USB ULPI UTMI+ Low Pin Interface (ULPI) is a commonly used PHY interface for USB 2.0. The ULPI specification describes a standard set of registers which the vendors can extend for their specific needs. ULPI PHYs provide often functions such as charger detection and ADP sensing and probing. There are two major issues that the bus type is meant to tackle: Firstly, ULPI registers are accessed from the controller. The bus provides convenient method for the controller drivers to share that access with the actual PHY drivers. Secondly, there are already platforms that assume ULPI PHYs are runtime detected, such as many Intel Baytrail based platforms. They do not provide any kind of hardware description for the ULPI PHYs like separate ACPI device object that could be used to enumerate a device from. Signed-off-by: Heikki Krogerus Acked-by: David Cohen Signed-off-by: Felipe Balbi --- include/linux/mod_devicetable.h | 6 ++ include/linux/ulpi/driver.h | 60 ++++++++++++++++++ include/linux/ulpi/interface.h | 23 +++++++ include/linux/ulpi/regs.h | 130 ++++++++++++++++++++++++++++++++++++++ include/linux/usb/ulpi.h | 134 +--------------------------------------- 5 files changed, 221 insertions(+), 132 deletions(-) create mode 100644 include/linux/ulpi/driver.h create mode 100644 include/linux/ulpi/interface.h create mode 100644 include/linux/ulpi/regs.h (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 3bfd56778c29..7ab00d61d30a 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -629,4 +629,10 @@ struct mcb_device_id { kernel_ulong_t driver_data; }; +struct ulpi_device_id { + __u16 vendor; + __u16 product; + kernel_ulong_t driver_data; +}; + #endif /* LINUX_MOD_DEVICETABLE_H */ diff --git a/include/linux/ulpi/driver.h b/include/linux/ulpi/driver.h new file mode 100644 index 000000000000..388f6e08b9d4 --- /dev/null +++ b/include/linux/ulpi/driver.h @@ -0,0 +1,60 @@ +#ifndef __LINUX_ULPI_DRIVER_H +#define __LINUX_ULPI_DRIVER_H + +#include + +#include + +struct ulpi_ops; + +/** + * struct ulpi - describes ULPI PHY device + * @id: vendor and product ids for ULPI device + * @ops: I/O access + * @dev: device interface + */ +struct ulpi { + struct ulpi_device_id id; + struct ulpi_ops *ops; + struct device dev; +}; + +#define to_ulpi_dev(d) container_of(d, struct ulpi, dev) + +static inline void ulpi_set_drvdata(struct ulpi *ulpi, void *data) +{ + dev_set_drvdata(&ulpi->dev, data); +} + +static inline void *ulpi_get_drvdata(struct ulpi *ulpi) +{ + return dev_get_drvdata(&ulpi->dev); +} + +/** + * struct ulpi_driver - describes a ULPI PHY driver + * @id_table: array of device identifiers supported by this driver + * @probe: binds this driver to ULPI device + * @remove: unbinds this driver from ULPI device + * @driver: the name and owner members must be initialized by the drivers + */ +struct ulpi_driver { + const struct ulpi_device_id *id_table; + int (*probe)(struct ulpi *ulpi); + void (*remove)(struct ulpi *ulpi); + struct device_driver driver; +}; + +#define to_ulpi_driver(d) container_of(d, struct ulpi_driver, driver) + +int ulpi_register_driver(struct ulpi_driver *drv); +void ulpi_unregister_driver(struct ulpi_driver *drv); + +#define module_ulpi_driver(__ulpi_driver) \ + module_driver(__ulpi_driver, ulpi_register_driver, \ + ulpi_unregister_driver) + +int ulpi_read(struct ulpi *ulpi, u8 addr); +int ulpi_write(struct ulpi *ulpi, u8 addr, u8 val); + +#endif /* __LINUX_ULPI_DRIVER_H */ diff --git a/include/linux/ulpi/interface.h b/include/linux/ulpi/interface.h new file mode 100644 index 000000000000..4de8ab491038 --- /dev/null +++ b/include/linux/ulpi/interface.h @@ -0,0 +1,23 @@ +#ifndef __LINUX_ULPI_INTERFACE_H +#define __LINUX_ULPI_INTERFACE_H + +#include + +struct ulpi; + +/** + * struct ulpi_ops - ULPI register access + * @dev: the interface provider + * @read: read operation for ULPI register access + * @write: write operation for ULPI register access + */ +struct ulpi_ops { + struct device *dev; + int (*read)(struct ulpi_ops *ops, u8 addr); + int (*write)(struct ulpi_ops *ops, u8 addr, u8 val); +}; + +struct ulpi *ulpi_register_interface(struct device *, struct ulpi_ops *); +void ulpi_unregister_interface(struct ulpi *); + +#endif /* __LINUX_ULPI_INTERFACE_H */ diff --git a/include/linux/ulpi/regs.h b/include/linux/ulpi/regs.h new file mode 100644 index 000000000000..b5b8b8804560 --- /dev/null +++ b/include/linux/ulpi/regs.h @@ -0,0 +1,130 @@ +#ifndef __LINUX_ULPI_REGS_H +#define __LINUX_ULPI_REGS_H + +/* + * Macros for Set and Clear + * See ULPI 1.1 specification to find the registers with Set and Clear offsets + */ +#define ULPI_SET(a) (a + 1) +#define ULPI_CLR(a) (a + 2) + +/* + * Register Map + */ +#define ULPI_VENDOR_ID_LOW 0x00 +#define ULPI_VENDOR_ID_HIGH 0x01 +#define ULPI_PRODUCT_ID_LOW 0x02 +#define ULPI_PRODUCT_ID_HIGH 0x03 +#define ULPI_FUNC_CTRL 0x04 +#define ULPI_IFC_CTRL 0x07 +#define ULPI_OTG_CTRL 0x0a +#define ULPI_USB_INT_EN_RISE 0x0d +#define ULPI_USB_INT_EN_FALL 0x10 +#define ULPI_USB_INT_STS 0x13 +#define ULPI_USB_INT_LATCH 0x14 +#define ULPI_DEBUG 0x15 +#define ULPI_SCRATCH 0x16 +/* Optional Carkit Registers */ +#define ULPI_CARKIT_CTRL 0x19 +#define ULPI_CARKIT_INT_DELAY 0x1c +#define ULPI_CARKIT_INT_EN 0x1d +#define ULPI_CARKIT_INT_STS 0x20 +#define ULPI_CARKIT_INT_LATCH 0x21 +#define ULPI_CARKIT_PLS_CTRL 0x22 +/* Other Optional Registers */ +#define ULPI_TX_POS_WIDTH 0x25 +#define ULPI_TX_NEG_WIDTH 0x26 +#define ULPI_POLARITY_RECOVERY 0x27 +/* Access Extended Register Set */ +#define ULPI_ACCESS_EXTENDED 0x2f +/* Vendor Specific */ +#define ULPI_VENDOR_SPECIFIC 0x30 +/* Extended Registers */ +#define ULPI_EXT_VENDOR_SPECIFIC 0x80 + +/* + * Register Bits + */ + +/* Function Control */ +#define ULPI_FUNC_CTRL_XCVRSEL BIT(0) +#define ULPI_FUNC_CTRL_XCVRSEL_MASK 0x3 +#define ULPI_FUNC_CTRL_HIGH_SPEED 0x0 +#define ULPI_FUNC_CTRL_FULL_SPEED 0x1 +#define ULPI_FUNC_CTRL_LOW_SPEED 0x2 +#define ULPI_FUNC_CTRL_FS4LS 0x3 +#define ULPI_FUNC_CTRL_TERMSELECT BIT(2) +#define ULPI_FUNC_CTRL_OPMODE BIT(3) +#define ULPI_FUNC_CTRL_OPMODE_MASK (0x3 << 3) +#define ULPI_FUNC_CTRL_OPMODE_NORMAL (0x0 << 3) +#define ULPI_FUNC_CTRL_OPMODE_NONDRIVING (0x1 << 3) +#define ULPI_FUNC_CTRL_OPMODE_DISABLE_NRZI (0x2 << 3) +#define ULPI_FUNC_CTRL_OPMODE_NOSYNC_NOEOP (0x3 << 3) +#define ULPI_FUNC_CTRL_RESET BIT(5) +#define ULPI_FUNC_CTRL_SUSPENDM BIT(6) + +/* Interface Control */ +#define ULPI_IFC_CTRL_6_PIN_SERIAL_MODE BIT(0) +#define ULPI_IFC_CTRL_3_PIN_SERIAL_MODE BIT(1) +#define ULPI_IFC_CTRL_CARKITMODE BIT(2) +#define ULPI_IFC_CTRL_CLOCKSUSPENDM BIT(3) +#define ULPI_IFC_CTRL_AUTORESUME BIT(4) +#define ULPI_IFC_CTRL_EXTERNAL_VBUS BIT(5) +#define ULPI_IFC_CTRL_PASSTHRU BIT(6) +#define ULPI_IFC_CTRL_PROTECT_IFC_DISABLE BIT(7) + +/* OTG Control */ +#define ULPI_OTG_CTRL_ID_PULLUP BIT(0) +#define ULPI_OTG_CTRL_DP_PULLDOWN BIT(1) +#define ULPI_OTG_CTRL_DM_PULLDOWN BIT(2) +#define ULPI_OTG_CTRL_DISCHRGVBUS BIT(3) +#define ULPI_OTG_CTRL_CHRGVBUS BIT(4) +#define ULPI_OTG_CTRL_DRVVBUS BIT(5) +#define ULPI_OTG_CTRL_DRVVBUS_EXT BIT(6) +#define ULPI_OTG_CTRL_EXTVBUSIND BIT(7) + +/* USB Interrupt Enable Rising, + * USB Interrupt Enable Falling, + * USB Interrupt Status and + * USB Interrupt Latch + */ +#define ULPI_INT_HOST_DISCONNECT BIT(0) +#define ULPI_INT_VBUS_VALID BIT(1) +#define ULPI_INT_SESS_VALID BIT(2) +#define ULPI_INT_SESS_END BIT(3) +#define ULPI_INT_IDGRD BIT(4) + +/* Debug */ +#define ULPI_DEBUG_LINESTATE0 BIT(0) +#define ULPI_DEBUG_LINESTATE1 BIT(1) + +/* Carkit Control */ +#define ULPI_CARKIT_CTRL_CARKITPWR BIT(0) +#define ULPI_CARKIT_CTRL_IDGNDDRV BIT(1) +#define ULPI_CARKIT_CTRL_TXDEN BIT(2) +#define ULPI_CARKIT_CTRL_RXDEN BIT(3) +#define ULPI_CARKIT_CTRL_SPKLEFTEN BIT(4) +#define ULPI_CARKIT_CTRL_SPKRIGHTEN BIT(5) +#define ULPI_CARKIT_CTRL_MICEN BIT(6) + +/* Carkit Interrupt Enable */ +#define ULPI_CARKIT_INT_EN_IDFLOAT_RISE BIT(0) +#define ULPI_CARKIT_INT_EN_IDFLOAT_FALL BIT(1) +#define ULPI_CARKIT_INT_EN_CARINTDET BIT(2) +#define ULPI_CARKIT_INT_EN_DP_RISE BIT(3) +#define ULPI_CARKIT_INT_EN_DP_FALL BIT(4) + +/* Carkit Interrupt Status and + * Carkit Interrupt Latch + */ +#define ULPI_CARKIT_INT_IDFLOAT BIT(0) +#define ULPI_CARKIT_INT_CARINTDET BIT(1) +#define ULPI_CARKIT_INT_DP BIT(2) + +/* Carkit Pulse Control*/ +#define ULPI_CARKIT_PLS_CTRL_TXPLSEN BIT(0) +#define ULPI_CARKIT_PLS_CTRL_RXPLSEN BIT(1) +#define ULPI_CARKIT_PLS_CTRL_SPKRLEFT_BIASEN BIT(2) +#define ULPI_CARKIT_PLS_CTRL_SPKRRIGHT_BIASEN BIT(3) + +#endif /* __LINUX_ULPI_REGS_H */ diff --git a/include/linux/usb/ulpi.h b/include/linux/usb/ulpi.h index 5c295c26ad37..5f07407a367a 100644 --- a/include/linux/usb/ulpi.h +++ b/include/linux/usb/ulpi.h @@ -12,6 +12,8 @@ #define __LINUX_USB_ULPI_H #include +#include + /*-------------------------------------------------------------------------*/ /* @@ -49,138 +51,6 @@ /*-------------------------------------------------------------------------*/ -/* - * Macros for Set and Clear - * See ULPI 1.1 specification to find the registers with Set and Clear offsets - */ -#define ULPI_SET(a) (a + 1) -#define ULPI_CLR(a) (a + 2) - -/*-------------------------------------------------------------------------*/ - -/* - * Register Map - */ -#define ULPI_VENDOR_ID_LOW 0x00 -#define ULPI_VENDOR_ID_HIGH 0x01 -#define ULPI_PRODUCT_ID_LOW 0x02 -#define ULPI_PRODUCT_ID_HIGH 0x03 -#define ULPI_FUNC_CTRL 0x04 -#define ULPI_IFC_CTRL 0x07 -#define ULPI_OTG_CTRL 0x0a -#define ULPI_USB_INT_EN_RISE 0x0d -#define ULPI_USB_INT_EN_FALL 0x10 -#define ULPI_USB_INT_STS 0x13 -#define ULPI_USB_INT_LATCH 0x14 -#define ULPI_DEBUG 0x15 -#define ULPI_SCRATCH 0x16 -/* Optional Carkit Registers */ -#define ULPI_CARCIT_CTRL 0x19 -#define ULPI_CARCIT_INT_DELAY 0x1c -#define ULPI_CARCIT_INT_EN 0x1d -#define ULPI_CARCIT_INT_STS 0x20 -#define ULPI_CARCIT_INT_LATCH 0x21 -#define ULPI_CARCIT_PLS_CTRL 0x22 -/* Other Optional Registers */ -#define ULPI_TX_POS_WIDTH 0x25 -#define ULPI_TX_NEG_WIDTH 0x26 -#define ULPI_POLARITY_RECOVERY 0x27 -/* Access Extended Register Set */ -#define ULPI_ACCESS_EXTENDED 0x2f -/* Vendor Specific */ -#define ULPI_VENDOR_SPECIFIC 0x30 -/* Extended Registers */ -#define ULPI_EXT_VENDOR_SPECIFIC 0x80 - -/*-------------------------------------------------------------------------*/ - -/* - * Register Bits - */ - -/* Function Control */ -#define ULPI_FUNC_CTRL_XCVRSEL (1 << 0) -#define ULPI_FUNC_CTRL_XCVRSEL_MASK (3 << 0) -#define ULPI_FUNC_CTRL_HIGH_SPEED (0 << 0) -#define ULPI_FUNC_CTRL_FULL_SPEED (1 << 0) -#define ULPI_FUNC_CTRL_LOW_SPEED (2 << 0) -#define ULPI_FUNC_CTRL_FS4LS (3 << 0) -#define ULPI_FUNC_CTRL_TERMSELECT (1 << 2) -#define ULPI_FUNC_CTRL_OPMODE (1 << 3) -#define ULPI_FUNC_CTRL_OPMODE_MASK (3 << 3) -#define ULPI_FUNC_CTRL_OPMODE_NORMAL (0 << 3) -#define ULPI_FUNC_CTRL_OPMODE_NONDRIVING (1 << 3) -#define ULPI_FUNC_CTRL_OPMODE_DISABLE_NRZI (2 << 3) -#define ULPI_FUNC_CTRL_OPMODE_NOSYNC_NOEOP (3 << 3) -#define ULPI_FUNC_CTRL_RESET (1 << 5) -#define ULPI_FUNC_CTRL_SUSPENDM (1 << 6) - -/* Interface Control */ -#define ULPI_IFC_CTRL_6_PIN_SERIAL_MODE (1 << 0) -#define ULPI_IFC_CTRL_3_PIN_SERIAL_MODE (1 << 1) -#define ULPI_IFC_CTRL_CARKITMODE (1 << 2) -#define ULPI_IFC_CTRL_CLOCKSUSPENDM (1 << 3) -#define ULPI_IFC_CTRL_AUTORESUME (1 << 4) -#define ULPI_IFC_CTRL_EXTERNAL_VBUS (1 << 5) -#define ULPI_IFC_CTRL_PASSTHRU (1 << 6) -#define ULPI_IFC_CTRL_PROTECT_IFC_DISABLE (1 << 7) - -/* OTG Control */ -#define ULPI_OTG_CTRL_ID_PULLUP (1 << 0) -#define ULPI_OTG_CTRL_DP_PULLDOWN (1 << 1) -#define ULPI_OTG_CTRL_DM_PULLDOWN (1 << 2) -#define ULPI_OTG_CTRL_DISCHRGVBUS (1 << 3) -#define ULPI_OTG_CTRL_CHRGVBUS (1 << 4) -#define ULPI_OTG_CTRL_DRVVBUS (1 << 5) -#define ULPI_OTG_CTRL_DRVVBUS_EXT (1 << 6) -#define ULPI_OTG_CTRL_EXTVBUSIND (1 << 7) - -/* USB Interrupt Enable Rising, - * USB Interrupt Enable Falling, - * USB Interrupt Status and - * USB Interrupt Latch - */ -#define ULPI_INT_HOST_DISCONNECT (1 << 0) -#define ULPI_INT_VBUS_VALID (1 << 1) -#define ULPI_INT_SESS_VALID (1 << 2) -#define ULPI_INT_SESS_END (1 << 3) -#define ULPI_INT_IDGRD (1 << 4) - -/* Debug */ -#define ULPI_DEBUG_LINESTATE0 (1 << 0) -#define ULPI_DEBUG_LINESTATE1 (1 << 1) - -/* Carkit Control */ -#define ULPI_CARKIT_CTRL_CARKITPWR (1 << 0) -#define ULPI_CARKIT_CTRL_IDGNDDRV (1 << 1) -#define ULPI_CARKIT_CTRL_TXDEN (1 << 2) -#define ULPI_CARKIT_CTRL_RXDEN (1 << 3) -#define ULPI_CARKIT_CTRL_SPKLEFTEN (1 << 4) -#define ULPI_CARKIT_CTRL_SPKRIGHTEN (1 << 5) -#define ULPI_CARKIT_CTRL_MICEN (1 << 6) - -/* Carkit Interrupt Enable */ -#define ULPI_CARKIT_INT_EN_IDFLOAT_RISE (1 << 0) -#define ULPI_CARKIT_INT_EN_IDFLOAT_FALL (1 << 1) -#define ULPI_CARKIT_INT_EN_CARINTDET (1 << 2) -#define ULPI_CARKIT_INT_EN_DP_RISE (1 << 3) -#define ULPI_CARKIT_INT_EN_DP_FALL (1 << 4) - -/* Carkit Interrupt Status and - * Carkit Interrupt Latch - */ -#define ULPI_CARKIT_INT_IDFLOAT (1 << 0) -#define ULPI_CARKIT_INT_CARINTDET (1 << 1) -#define ULPI_CARKIT_INT_DP (1 << 2) - -/* Carkit Pulse Control*/ -#define ULPI_CARKIT_PLS_CTRL_TXPLSEN (1 << 0) -#define ULPI_CARKIT_PLS_CTRL_RXPLSEN (1 << 1) -#define ULPI_CARKIT_PLS_CTRL_SPKRLEFT_BIASEN (1 << 2) -#define ULPI_CARKIT_PLS_CTRL_SPKRRIGHT_BIASEN (1 << 3) - -/*-------------------------------------------------------------------------*/ - #if IS_ENABLED(CONFIG_USB_ULPI) struct usb_phy *otg_ulpi_create(struct usb_phy_io_ops *ops, unsigned int flags); -- cgit v1.2.3 From f267caab44451baa70d25fa3191a68bb79ad1b08 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 13 May 2015 14:03:51 -0400 Subject: tracing: Remove unused prototype ftrace_event_define_field() ftrace_event_define_field() has a prototype defined but never used. Remove it. Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 65ce6de91307..f8465d65f3c7 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -219,10 +219,6 @@ struct ftrace_event_class { extern int ftrace_event_reg(struct ftrace_event_call *event, enum trace_reg type, void *data); -int ftrace_event_define_field(struct ftrace_event_call *call, - char *type, int len, char *item, int offset, - int field_size, int sign, int filter); - struct ftrace_event_buffer { struct ring_buffer *buffer; struct ring_buffer_event *event; @@ -238,10 +234,6 @@ void *ftrace_event_buffer_reserve(struct ftrace_event_buffer *fbuffer, void ftrace_event_buffer_commit(struct ftrace_event_buffer *fbuffer); -int ftrace_event_define_field(struct ftrace_event_call *call, - char *type, int len, char *item, int offset, - int field_size, int sign, int filter); - enum { TRACE_EVENT_FL_FILTERED_BIT, TRACE_EVENT_FL_CAP_ANY_BIT, -- cgit v1.2.3 From af658dca221207174fc0a7bcdcd4cff7c589fdd8 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 29 Apr 2015 14:36:05 -0400 Subject: tracing: Rename ftrace_event.h to trace_events.h The term "ftrace" is really the infrastructure of the function hooks, and not the trace events. Rename ftrace_event.h to trace_events.h to represent the trace_event infrastructure and decouple the term ftrace from it. Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 616 ------------------------------------------- include/linux/trace_events.h | 616 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 616 insertions(+), 616 deletions(-) delete mode 100644 include/linux/ftrace_event.h create mode 100644 include/linux/trace_events.h (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h deleted file mode 100644 index f8465d65f3c7..000000000000 --- a/include/linux/ftrace_event.h +++ /dev/null @@ -1,616 +0,0 @@ - -#ifndef _LINUX_FTRACE_EVENT_H -#define _LINUX_FTRACE_EVENT_H - -#include -#include -#include -#include -#include -#include - -struct trace_array; -struct trace_buffer; -struct tracer; -struct dentry; -struct bpf_prog; - -struct trace_print_flags { - unsigned long mask; - const char *name; -}; - -struct trace_print_flags_u64 { - unsigned long long mask; - const char *name; -}; - -const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim, - unsigned long flags, - const struct trace_print_flags *flag_array); - -const char *ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, - const struct trace_print_flags *symbol_array); - -#if BITS_PER_LONG == 32 -const char *ftrace_print_symbols_seq_u64(struct trace_seq *p, - unsigned long long val, - const struct trace_print_flags_u64 - *symbol_array); -#endif - -const char *ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr, - unsigned int bitmask_size); - -const char *ftrace_print_hex_seq(struct trace_seq *p, - const unsigned char *buf, int len); - -const char *ftrace_print_array_seq(struct trace_seq *p, - const void *buf, int count, - size_t el_size); - -struct trace_iterator; -struct trace_event; - -int ftrace_raw_output_prep(struct trace_iterator *iter, - struct trace_event *event); - -/* - * The trace entry - the most basic unit of tracing. This is what - * is printed in the end as a single line in the trace output, such as: - * - * bash-15816 [01] 235.197585: idle_cpu <- irq_enter - */ -struct trace_entry { - unsigned short type; - unsigned char flags; - unsigned char preempt_count; - int pid; -}; - -#define FTRACE_MAX_EVENT \ - ((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1) - -/* - * Trace iterator - used by printout routines who present trace - * results to users and which routines might sleep, etc: - */ -struct trace_iterator { - struct trace_array *tr; - struct tracer *trace; - struct trace_buffer *trace_buffer; - void *private; - int cpu_file; - struct mutex mutex; - struct ring_buffer_iter **buffer_iter; - unsigned long iter_flags; - - /* trace_seq for __print_flags() and __print_symbolic() etc. */ - struct trace_seq tmp_seq; - - cpumask_var_t started; - - /* it's true when current open file is snapshot */ - bool snapshot; - - /* The below is zeroed out in pipe_read */ - struct trace_seq seq; - struct trace_entry *ent; - unsigned long lost_events; - int leftover; - int ent_size; - int cpu; - u64 ts; - - loff_t pos; - long idx; - - /* All new field here will be zeroed out in pipe_read */ -}; - -enum trace_iter_flags { - TRACE_FILE_LAT_FMT = 1, - TRACE_FILE_ANNOTATE = 2, - TRACE_FILE_TIME_IN_NS = 4, -}; - - -typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter, - int flags, struct trace_event *event); - -struct trace_event_functions { - trace_print_func trace; - trace_print_func raw; - trace_print_func hex; - trace_print_func binary; -}; - -struct trace_event { - struct hlist_node node; - struct list_head list; - int type; - struct trace_event_functions *funcs; -}; - -extern int register_ftrace_event(struct trace_event *event); -extern int unregister_ftrace_event(struct trace_event *event); - -/* Return values for print_line callback */ -enum print_line_t { - TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */ - TRACE_TYPE_HANDLED = 1, - TRACE_TYPE_UNHANDLED = 2, /* Relay to other output functions */ - TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */ -}; - -/* - * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq - * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function - * simplifies those functions and keeps them in sync. - */ -static inline enum print_line_t trace_handle_return(struct trace_seq *s) -{ - return trace_seq_has_overflowed(s) ? - TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED; -} - -void tracing_generic_entry_update(struct trace_entry *entry, - unsigned long flags, - int pc); -struct ftrace_event_file; - -struct ring_buffer_event * -trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer, - struct ftrace_event_file *ftrace_file, - int type, unsigned long len, - unsigned long flags, int pc); -struct ring_buffer_event * -trace_current_buffer_lock_reserve(struct ring_buffer **current_buffer, - int type, unsigned long len, - unsigned long flags, int pc); -void trace_current_buffer_unlock_commit(struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags, int pc); -void trace_buffer_unlock_commit(struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags, int pc); -void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags, int pc, - struct pt_regs *regs); -void trace_current_buffer_discard_commit(struct ring_buffer *buffer, - struct ring_buffer_event *event); - -void tracing_record_cmdline(struct task_struct *tsk); - -int ftrace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...); - -struct event_filter; - -enum trace_reg { - TRACE_REG_REGISTER, - TRACE_REG_UNREGISTER, -#ifdef CONFIG_PERF_EVENTS - TRACE_REG_PERF_REGISTER, - TRACE_REG_PERF_UNREGISTER, - TRACE_REG_PERF_OPEN, - TRACE_REG_PERF_CLOSE, - TRACE_REG_PERF_ADD, - TRACE_REG_PERF_DEL, -#endif -}; - -struct ftrace_event_call; - -struct ftrace_event_class { - const char *system; - void *probe; -#ifdef CONFIG_PERF_EVENTS - void *perf_probe; -#endif - int (*reg)(struct ftrace_event_call *event, - enum trace_reg type, void *data); - int (*define_fields)(struct ftrace_event_call *); - struct list_head *(*get_fields)(struct ftrace_event_call *); - struct list_head fields; - int (*raw_init)(struct ftrace_event_call *); -}; - -extern int ftrace_event_reg(struct ftrace_event_call *event, - enum trace_reg type, void *data); - -struct ftrace_event_buffer { - struct ring_buffer *buffer; - struct ring_buffer_event *event; - struct ftrace_event_file *ftrace_file; - void *entry; - unsigned long flags; - int pc; -}; - -void *ftrace_event_buffer_reserve(struct ftrace_event_buffer *fbuffer, - struct ftrace_event_file *ftrace_file, - unsigned long len); - -void ftrace_event_buffer_commit(struct ftrace_event_buffer *fbuffer); - -enum { - TRACE_EVENT_FL_FILTERED_BIT, - TRACE_EVENT_FL_CAP_ANY_BIT, - TRACE_EVENT_FL_NO_SET_FILTER_BIT, - TRACE_EVENT_FL_IGNORE_ENABLE_BIT, - TRACE_EVENT_FL_WAS_ENABLED_BIT, - TRACE_EVENT_FL_USE_CALL_FILTER_BIT, - TRACE_EVENT_FL_TRACEPOINT_BIT, - TRACE_EVENT_FL_KPROBE_BIT, -}; - -/* - * Event flags: - * FILTERED - The event has a filter attached - * CAP_ANY - Any user can enable for perf - * NO_SET_FILTER - Set when filter has error and is to be ignored - * IGNORE_ENABLE - For ftrace internal events, do not enable with debugfs file - * WAS_ENABLED - Set and stays set when an event was ever enabled - * (used for module unloading, if a module event is enabled, - * it is best to clear the buffers that used it). - * USE_CALL_FILTER - For ftrace internal events, don't use file filter - * TRACEPOINT - Event is a tracepoint - * KPROBE - Event is a kprobe - */ -enum { - TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), - TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT), - TRACE_EVENT_FL_NO_SET_FILTER = (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT), - TRACE_EVENT_FL_IGNORE_ENABLE = (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT), - TRACE_EVENT_FL_WAS_ENABLED = (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT), - TRACE_EVENT_FL_USE_CALL_FILTER = (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT), - TRACE_EVENT_FL_TRACEPOINT = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT), - TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT), -}; - -struct ftrace_event_call { - struct list_head list; - struct ftrace_event_class *class; - union { - char *name; - /* Set TRACE_EVENT_FL_TRACEPOINT flag when using "tp" */ - struct tracepoint *tp; - }; - struct trace_event event; - char *print_fmt; - struct event_filter *filter; - void *mod; - void *data; - /* - * bit 0: filter_active - * bit 1: allow trace by non root (cap any) - * bit 2: failed to apply filter - * bit 3: ftrace internal event (do not enable) - * bit 4: Event was enabled by module - * bit 5: use call filter rather than file filter - * bit 6: Event is a tracepoint - */ - int flags; /* static flags of different events */ - -#ifdef CONFIG_PERF_EVENTS - int perf_refcount; - struct hlist_head __percpu *perf_events; - struct bpf_prog *prog; - - int (*perf_perm)(struct ftrace_event_call *, - struct perf_event *); -#endif -}; - -static inline const char * -ftrace_event_name(struct ftrace_event_call *call) -{ - if (call->flags & TRACE_EVENT_FL_TRACEPOINT) - return call->tp ? call->tp->name : NULL; - else - return call->name; -} - -struct trace_array; -struct ftrace_subsystem_dir; - -enum { - FTRACE_EVENT_FL_ENABLED_BIT, - FTRACE_EVENT_FL_RECORDED_CMD_BIT, - FTRACE_EVENT_FL_FILTERED_BIT, - FTRACE_EVENT_FL_NO_SET_FILTER_BIT, - FTRACE_EVENT_FL_SOFT_MODE_BIT, - FTRACE_EVENT_FL_SOFT_DISABLED_BIT, - FTRACE_EVENT_FL_TRIGGER_MODE_BIT, - FTRACE_EVENT_FL_TRIGGER_COND_BIT, -}; - -/* - * Ftrace event file flags: - * ENABLED - The event is enabled - * RECORDED_CMD - The comms should be recorded at sched_switch - * FILTERED - The event has a filter attached - * NO_SET_FILTER - Set when filter has error and is to be ignored - * SOFT_MODE - The event is enabled/disabled by SOFT_DISABLED - * SOFT_DISABLED - When set, do not trace the event (even though its - * tracepoint may be enabled) - * TRIGGER_MODE - When set, invoke the triggers associated with the event - * TRIGGER_COND - When set, one or more triggers has an associated filter - */ -enum { - FTRACE_EVENT_FL_ENABLED = (1 << FTRACE_EVENT_FL_ENABLED_BIT), - FTRACE_EVENT_FL_RECORDED_CMD = (1 << FTRACE_EVENT_FL_RECORDED_CMD_BIT), - FTRACE_EVENT_FL_FILTERED = (1 << FTRACE_EVENT_FL_FILTERED_BIT), - FTRACE_EVENT_FL_NO_SET_FILTER = (1 << FTRACE_EVENT_FL_NO_SET_FILTER_BIT), - FTRACE_EVENT_FL_SOFT_MODE = (1 << FTRACE_EVENT_FL_SOFT_MODE_BIT), - FTRACE_EVENT_FL_SOFT_DISABLED = (1 << FTRACE_EVENT_FL_SOFT_DISABLED_BIT), - FTRACE_EVENT_FL_TRIGGER_MODE = (1 << FTRACE_EVENT_FL_TRIGGER_MODE_BIT), - FTRACE_EVENT_FL_TRIGGER_COND = (1 << FTRACE_EVENT_FL_TRIGGER_COND_BIT), -}; - -struct ftrace_event_file { - struct list_head list; - struct ftrace_event_call *event_call; - struct event_filter *filter; - struct dentry *dir; - struct trace_array *tr; - struct ftrace_subsystem_dir *system; - struct list_head triggers; - - /* - * 32 bit flags: - * bit 0: enabled - * bit 1: enabled cmd record - * bit 2: enable/disable with the soft disable bit - * bit 3: soft disabled - * bit 4: trigger enabled - * - * Note: The bits must be set atomically to prevent races - * from other writers. Reads of flags do not need to be in - * sync as they occur in critical sections. But the way flags - * is currently used, these changes do not affect the code - * except that when a change is made, it may have a slight - * delay in propagating the changes to other CPUs due to - * caching and such. Which is mostly OK ;-) - */ - unsigned long flags; - atomic_t sm_ref; /* soft-mode reference counter */ - atomic_t tm_ref; /* trigger-mode reference counter */ -}; - -#define __TRACE_EVENT_FLAGS(name, value) \ - static int __init trace_init_flags_##name(void) \ - { \ - event_##name.flags |= value; \ - return 0; \ - } \ - early_initcall(trace_init_flags_##name); - -#define __TRACE_EVENT_PERF_PERM(name, expr...) \ - static int perf_perm_##name(struct ftrace_event_call *tp_event, \ - struct perf_event *p_event) \ - { \ - return ({ expr; }); \ - } \ - static int __init trace_init_perf_perm_##name(void) \ - { \ - event_##name.perf_perm = &perf_perm_##name; \ - return 0; \ - } \ - early_initcall(trace_init_perf_perm_##name); - -#define PERF_MAX_TRACE_SIZE 2048 - -#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ - -enum event_trigger_type { - ETT_NONE = (0), - ETT_TRACE_ONOFF = (1 << 0), - ETT_SNAPSHOT = (1 << 1), - ETT_STACKTRACE = (1 << 2), - ETT_EVENT_ENABLE = (1 << 3), -}; - -extern int filter_match_preds(struct event_filter *filter, void *rec); - -extern int filter_check_discard(struct ftrace_event_file *file, void *rec, - struct ring_buffer *buffer, - struct ring_buffer_event *event); -extern int call_filter_check_discard(struct ftrace_event_call *call, void *rec, - struct ring_buffer *buffer, - struct ring_buffer_event *event); -extern enum event_trigger_type event_triggers_call(struct ftrace_event_file *file, - void *rec); -extern void event_triggers_post_call(struct ftrace_event_file *file, - enum event_trigger_type tt); - -/** - * ftrace_trigger_soft_disabled - do triggers and test if soft disabled - * @file: The file pointer of the event to test - * - * If any triggers without filters are attached to this event, they - * will be called here. If the event is soft disabled and has no - * triggers that require testing the fields, it will return true, - * otherwise false. - */ -static inline bool -ftrace_trigger_soft_disabled(struct ftrace_event_file *file) -{ - unsigned long eflags = file->flags; - - if (!(eflags & FTRACE_EVENT_FL_TRIGGER_COND)) { - if (eflags & FTRACE_EVENT_FL_TRIGGER_MODE) - event_triggers_call(file, NULL); - if (eflags & FTRACE_EVENT_FL_SOFT_DISABLED) - return true; - } - return false; -} - -/* - * Helper function for event_trigger_unlock_commit{_regs}(). - * If there are event triggers attached to this event that requires - * filtering against its fields, then they wil be called as the - * entry already holds the field information of the current event. - * - * It also checks if the event should be discarded or not. - * It is to be discarded if the event is soft disabled and the - * event was only recorded to process triggers, or if the event - * filter is active and this event did not match the filters. - * - * Returns true if the event is discarded, false otherwise. - */ -static inline bool -__event_trigger_test_discard(struct ftrace_event_file *file, - struct ring_buffer *buffer, - struct ring_buffer_event *event, - void *entry, - enum event_trigger_type *tt) -{ - unsigned long eflags = file->flags; - - if (eflags & FTRACE_EVENT_FL_TRIGGER_COND) - *tt = event_triggers_call(file, entry); - - if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags)) - ring_buffer_discard_commit(buffer, event); - else if (!filter_check_discard(file, entry, buffer, event)) - return false; - - return true; -} - -/** - * event_trigger_unlock_commit - handle triggers and finish event commit - * @file: The file pointer assoctiated to the event - * @buffer: The ring buffer that the event is being written to - * @event: The event meta data in the ring buffer - * @entry: The event itself - * @irq_flags: The state of the interrupts at the start of the event - * @pc: The state of the preempt count at the start of the event. - * - * This is a helper function to handle triggers that require data - * from the event itself. It also tests the event against filters and - * if the event is soft disabled and should be discarded. - */ -static inline void -event_trigger_unlock_commit(struct ftrace_event_file *file, - struct ring_buffer *buffer, - struct ring_buffer_event *event, - void *entry, unsigned long irq_flags, int pc) -{ - enum event_trigger_type tt = ETT_NONE; - - if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) - trace_buffer_unlock_commit(buffer, event, irq_flags, pc); - - if (tt) - event_triggers_post_call(file, tt); -} - -/** - * event_trigger_unlock_commit_regs - handle triggers and finish event commit - * @file: The file pointer assoctiated to the event - * @buffer: The ring buffer that the event is being written to - * @event: The event meta data in the ring buffer - * @entry: The event itself - * @irq_flags: The state of the interrupts at the start of the event - * @pc: The state of the preempt count at the start of the event. - * - * This is a helper function to handle triggers that require data - * from the event itself. It also tests the event against filters and - * if the event is soft disabled and should be discarded. - * - * Same as event_trigger_unlock_commit() but calls - * trace_buffer_unlock_commit_regs() instead of trace_buffer_unlock_commit(). - */ -static inline void -event_trigger_unlock_commit_regs(struct ftrace_event_file *file, - struct ring_buffer *buffer, - struct ring_buffer_event *event, - void *entry, unsigned long irq_flags, int pc, - struct pt_regs *regs) -{ - enum event_trigger_type tt = ETT_NONE; - - if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) - trace_buffer_unlock_commit_regs(buffer, event, - irq_flags, pc, regs); - - if (tt) - event_triggers_post_call(file, tt); -} - -#ifdef CONFIG_BPF_SYSCALL -unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx); -#else -static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) -{ - return 1; -} -#endif - -enum { - FILTER_OTHER = 0, - FILTER_STATIC_STRING, - FILTER_DYN_STRING, - FILTER_PTR_STRING, - FILTER_TRACE_FN, -}; - -extern int trace_event_raw_init(struct ftrace_event_call *call); -extern int trace_define_field(struct ftrace_event_call *call, const char *type, - const char *name, int offset, int size, - int is_signed, int filter_type); -extern int trace_add_event_call(struct ftrace_event_call *call); -extern int trace_remove_event_call(struct ftrace_event_call *call); - -#define is_signed_type(type) (((type)(-1)) < (type)1) - -int trace_set_clr_event(const char *system, const char *event, int set); - -/* - * The double __builtin_constant_p is because gcc will give us an error - * if we try to allocate the static variable to fmt if it is not a - * constant. Even with the outer if statement optimizing out. - */ -#define event_trace_printk(ip, fmt, args...) \ -do { \ - __trace_printk_check_format(fmt, ##args); \ - tracing_record_cmdline(current); \ - if (__builtin_constant_p(fmt)) { \ - static const char *trace_printk_fmt \ - __attribute__((section("__trace_printk_fmt"))) = \ - __builtin_constant_p(fmt) ? fmt : NULL; \ - \ - __trace_bprintk(ip, trace_printk_fmt, ##args); \ - } else \ - __trace_printk(ip, fmt, ##args); \ -} while (0) - -#ifdef CONFIG_PERF_EVENTS -struct perf_event; - -DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); - -extern int perf_trace_init(struct perf_event *event); -extern void perf_trace_destroy(struct perf_event *event); -extern int perf_trace_add(struct perf_event *event, int flags); -extern void perf_trace_del(struct perf_event *event, int flags); -extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, - char *filter_str); -extern void ftrace_profile_free_filter(struct perf_event *event); -extern void *perf_trace_buf_prepare(int size, unsigned short type, - struct pt_regs **regs, int *rctxp); - -static inline void -perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, - u64 count, struct pt_regs *regs, void *head, - struct task_struct *task) -{ - perf_tp_event(addr, count, raw_data, size, regs, head, rctx, task); -} -#endif - -#endif /* _LINUX_FTRACE_EVENT_H */ diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h new file mode 100644 index 000000000000..f8465d65f3c7 --- /dev/null +++ b/include/linux/trace_events.h @@ -0,0 +1,616 @@ + +#ifndef _LINUX_FTRACE_EVENT_H +#define _LINUX_FTRACE_EVENT_H + +#include +#include +#include +#include +#include +#include + +struct trace_array; +struct trace_buffer; +struct tracer; +struct dentry; +struct bpf_prog; + +struct trace_print_flags { + unsigned long mask; + const char *name; +}; + +struct trace_print_flags_u64 { + unsigned long long mask; + const char *name; +}; + +const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim, + unsigned long flags, + const struct trace_print_flags *flag_array); + +const char *ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, + const struct trace_print_flags *symbol_array); + +#if BITS_PER_LONG == 32 +const char *ftrace_print_symbols_seq_u64(struct trace_seq *p, + unsigned long long val, + const struct trace_print_flags_u64 + *symbol_array); +#endif + +const char *ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr, + unsigned int bitmask_size); + +const char *ftrace_print_hex_seq(struct trace_seq *p, + const unsigned char *buf, int len); + +const char *ftrace_print_array_seq(struct trace_seq *p, + const void *buf, int count, + size_t el_size); + +struct trace_iterator; +struct trace_event; + +int ftrace_raw_output_prep(struct trace_iterator *iter, + struct trace_event *event); + +/* + * The trace entry - the most basic unit of tracing. This is what + * is printed in the end as a single line in the trace output, such as: + * + * bash-15816 [01] 235.197585: idle_cpu <- irq_enter + */ +struct trace_entry { + unsigned short type; + unsigned char flags; + unsigned char preempt_count; + int pid; +}; + +#define FTRACE_MAX_EVENT \ + ((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1) + +/* + * Trace iterator - used by printout routines who present trace + * results to users and which routines might sleep, etc: + */ +struct trace_iterator { + struct trace_array *tr; + struct tracer *trace; + struct trace_buffer *trace_buffer; + void *private; + int cpu_file; + struct mutex mutex; + struct ring_buffer_iter **buffer_iter; + unsigned long iter_flags; + + /* trace_seq for __print_flags() and __print_symbolic() etc. */ + struct trace_seq tmp_seq; + + cpumask_var_t started; + + /* it's true when current open file is snapshot */ + bool snapshot; + + /* The below is zeroed out in pipe_read */ + struct trace_seq seq; + struct trace_entry *ent; + unsigned long lost_events; + int leftover; + int ent_size; + int cpu; + u64 ts; + + loff_t pos; + long idx; + + /* All new field here will be zeroed out in pipe_read */ +}; + +enum trace_iter_flags { + TRACE_FILE_LAT_FMT = 1, + TRACE_FILE_ANNOTATE = 2, + TRACE_FILE_TIME_IN_NS = 4, +}; + + +typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter, + int flags, struct trace_event *event); + +struct trace_event_functions { + trace_print_func trace; + trace_print_func raw; + trace_print_func hex; + trace_print_func binary; +}; + +struct trace_event { + struct hlist_node node; + struct list_head list; + int type; + struct trace_event_functions *funcs; +}; + +extern int register_ftrace_event(struct trace_event *event); +extern int unregister_ftrace_event(struct trace_event *event); + +/* Return values for print_line callback */ +enum print_line_t { + TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */ + TRACE_TYPE_HANDLED = 1, + TRACE_TYPE_UNHANDLED = 2, /* Relay to other output functions */ + TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */ +}; + +/* + * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq + * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function + * simplifies those functions and keeps them in sync. + */ +static inline enum print_line_t trace_handle_return(struct trace_seq *s) +{ + return trace_seq_has_overflowed(s) ? + TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED; +} + +void tracing_generic_entry_update(struct trace_entry *entry, + unsigned long flags, + int pc); +struct ftrace_event_file; + +struct ring_buffer_event * +trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer, + struct ftrace_event_file *ftrace_file, + int type, unsigned long len, + unsigned long flags, int pc); +struct ring_buffer_event * +trace_current_buffer_lock_reserve(struct ring_buffer **current_buffer, + int type, unsigned long len, + unsigned long flags, int pc); +void trace_current_buffer_unlock_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event, + unsigned long flags, int pc); +void trace_buffer_unlock_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event, + unsigned long flags, int pc); +void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer, + struct ring_buffer_event *event, + unsigned long flags, int pc, + struct pt_regs *regs); +void trace_current_buffer_discard_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event); + +void tracing_record_cmdline(struct task_struct *tsk); + +int ftrace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...); + +struct event_filter; + +enum trace_reg { + TRACE_REG_REGISTER, + TRACE_REG_UNREGISTER, +#ifdef CONFIG_PERF_EVENTS + TRACE_REG_PERF_REGISTER, + TRACE_REG_PERF_UNREGISTER, + TRACE_REG_PERF_OPEN, + TRACE_REG_PERF_CLOSE, + TRACE_REG_PERF_ADD, + TRACE_REG_PERF_DEL, +#endif +}; + +struct ftrace_event_call; + +struct ftrace_event_class { + const char *system; + void *probe; +#ifdef CONFIG_PERF_EVENTS + void *perf_probe; +#endif + int (*reg)(struct ftrace_event_call *event, + enum trace_reg type, void *data); + int (*define_fields)(struct ftrace_event_call *); + struct list_head *(*get_fields)(struct ftrace_event_call *); + struct list_head fields; + int (*raw_init)(struct ftrace_event_call *); +}; + +extern int ftrace_event_reg(struct ftrace_event_call *event, + enum trace_reg type, void *data); + +struct ftrace_event_buffer { + struct ring_buffer *buffer; + struct ring_buffer_event *event; + struct ftrace_event_file *ftrace_file; + void *entry; + unsigned long flags; + int pc; +}; + +void *ftrace_event_buffer_reserve(struct ftrace_event_buffer *fbuffer, + struct ftrace_event_file *ftrace_file, + unsigned long len); + +void ftrace_event_buffer_commit(struct ftrace_event_buffer *fbuffer); + +enum { + TRACE_EVENT_FL_FILTERED_BIT, + TRACE_EVENT_FL_CAP_ANY_BIT, + TRACE_EVENT_FL_NO_SET_FILTER_BIT, + TRACE_EVENT_FL_IGNORE_ENABLE_BIT, + TRACE_EVENT_FL_WAS_ENABLED_BIT, + TRACE_EVENT_FL_USE_CALL_FILTER_BIT, + TRACE_EVENT_FL_TRACEPOINT_BIT, + TRACE_EVENT_FL_KPROBE_BIT, +}; + +/* + * Event flags: + * FILTERED - The event has a filter attached + * CAP_ANY - Any user can enable for perf + * NO_SET_FILTER - Set when filter has error and is to be ignored + * IGNORE_ENABLE - For ftrace internal events, do not enable with debugfs file + * WAS_ENABLED - Set and stays set when an event was ever enabled + * (used for module unloading, if a module event is enabled, + * it is best to clear the buffers that used it). + * USE_CALL_FILTER - For ftrace internal events, don't use file filter + * TRACEPOINT - Event is a tracepoint + * KPROBE - Event is a kprobe + */ +enum { + TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), + TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT), + TRACE_EVENT_FL_NO_SET_FILTER = (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT), + TRACE_EVENT_FL_IGNORE_ENABLE = (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT), + TRACE_EVENT_FL_WAS_ENABLED = (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT), + TRACE_EVENT_FL_USE_CALL_FILTER = (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT), + TRACE_EVENT_FL_TRACEPOINT = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT), + TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT), +}; + +struct ftrace_event_call { + struct list_head list; + struct ftrace_event_class *class; + union { + char *name; + /* Set TRACE_EVENT_FL_TRACEPOINT flag when using "tp" */ + struct tracepoint *tp; + }; + struct trace_event event; + char *print_fmt; + struct event_filter *filter; + void *mod; + void *data; + /* + * bit 0: filter_active + * bit 1: allow trace by non root (cap any) + * bit 2: failed to apply filter + * bit 3: ftrace internal event (do not enable) + * bit 4: Event was enabled by module + * bit 5: use call filter rather than file filter + * bit 6: Event is a tracepoint + */ + int flags; /* static flags of different events */ + +#ifdef CONFIG_PERF_EVENTS + int perf_refcount; + struct hlist_head __percpu *perf_events; + struct bpf_prog *prog; + + int (*perf_perm)(struct ftrace_event_call *, + struct perf_event *); +#endif +}; + +static inline const char * +ftrace_event_name(struct ftrace_event_call *call) +{ + if (call->flags & TRACE_EVENT_FL_TRACEPOINT) + return call->tp ? call->tp->name : NULL; + else + return call->name; +} + +struct trace_array; +struct ftrace_subsystem_dir; + +enum { + FTRACE_EVENT_FL_ENABLED_BIT, + FTRACE_EVENT_FL_RECORDED_CMD_BIT, + FTRACE_EVENT_FL_FILTERED_BIT, + FTRACE_EVENT_FL_NO_SET_FILTER_BIT, + FTRACE_EVENT_FL_SOFT_MODE_BIT, + FTRACE_EVENT_FL_SOFT_DISABLED_BIT, + FTRACE_EVENT_FL_TRIGGER_MODE_BIT, + FTRACE_EVENT_FL_TRIGGER_COND_BIT, +}; + +/* + * Ftrace event file flags: + * ENABLED - The event is enabled + * RECORDED_CMD - The comms should be recorded at sched_switch + * FILTERED - The event has a filter attached + * NO_SET_FILTER - Set when filter has error and is to be ignored + * SOFT_MODE - The event is enabled/disabled by SOFT_DISABLED + * SOFT_DISABLED - When set, do not trace the event (even though its + * tracepoint may be enabled) + * TRIGGER_MODE - When set, invoke the triggers associated with the event + * TRIGGER_COND - When set, one or more triggers has an associated filter + */ +enum { + FTRACE_EVENT_FL_ENABLED = (1 << FTRACE_EVENT_FL_ENABLED_BIT), + FTRACE_EVENT_FL_RECORDED_CMD = (1 << FTRACE_EVENT_FL_RECORDED_CMD_BIT), + FTRACE_EVENT_FL_FILTERED = (1 << FTRACE_EVENT_FL_FILTERED_BIT), + FTRACE_EVENT_FL_NO_SET_FILTER = (1 << FTRACE_EVENT_FL_NO_SET_FILTER_BIT), + FTRACE_EVENT_FL_SOFT_MODE = (1 << FTRACE_EVENT_FL_SOFT_MODE_BIT), + FTRACE_EVENT_FL_SOFT_DISABLED = (1 << FTRACE_EVENT_FL_SOFT_DISABLED_BIT), + FTRACE_EVENT_FL_TRIGGER_MODE = (1 << FTRACE_EVENT_FL_TRIGGER_MODE_BIT), + FTRACE_EVENT_FL_TRIGGER_COND = (1 << FTRACE_EVENT_FL_TRIGGER_COND_BIT), +}; + +struct ftrace_event_file { + struct list_head list; + struct ftrace_event_call *event_call; + struct event_filter *filter; + struct dentry *dir; + struct trace_array *tr; + struct ftrace_subsystem_dir *system; + struct list_head triggers; + + /* + * 32 bit flags: + * bit 0: enabled + * bit 1: enabled cmd record + * bit 2: enable/disable with the soft disable bit + * bit 3: soft disabled + * bit 4: trigger enabled + * + * Note: The bits must be set atomically to prevent races + * from other writers. Reads of flags do not need to be in + * sync as they occur in critical sections. But the way flags + * is currently used, these changes do not affect the code + * except that when a change is made, it may have a slight + * delay in propagating the changes to other CPUs due to + * caching and such. Which is mostly OK ;-) + */ + unsigned long flags; + atomic_t sm_ref; /* soft-mode reference counter */ + atomic_t tm_ref; /* trigger-mode reference counter */ +}; + +#define __TRACE_EVENT_FLAGS(name, value) \ + static int __init trace_init_flags_##name(void) \ + { \ + event_##name.flags |= value; \ + return 0; \ + } \ + early_initcall(trace_init_flags_##name); + +#define __TRACE_EVENT_PERF_PERM(name, expr...) \ + static int perf_perm_##name(struct ftrace_event_call *tp_event, \ + struct perf_event *p_event) \ + { \ + return ({ expr; }); \ + } \ + static int __init trace_init_perf_perm_##name(void) \ + { \ + event_##name.perf_perm = &perf_perm_##name; \ + return 0; \ + } \ + early_initcall(trace_init_perf_perm_##name); + +#define PERF_MAX_TRACE_SIZE 2048 + +#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ + +enum event_trigger_type { + ETT_NONE = (0), + ETT_TRACE_ONOFF = (1 << 0), + ETT_SNAPSHOT = (1 << 1), + ETT_STACKTRACE = (1 << 2), + ETT_EVENT_ENABLE = (1 << 3), +}; + +extern int filter_match_preds(struct event_filter *filter, void *rec); + +extern int filter_check_discard(struct ftrace_event_file *file, void *rec, + struct ring_buffer *buffer, + struct ring_buffer_event *event); +extern int call_filter_check_discard(struct ftrace_event_call *call, void *rec, + struct ring_buffer *buffer, + struct ring_buffer_event *event); +extern enum event_trigger_type event_triggers_call(struct ftrace_event_file *file, + void *rec); +extern void event_triggers_post_call(struct ftrace_event_file *file, + enum event_trigger_type tt); + +/** + * ftrace_trigger_soft_disabled - do triggers and test if soft disabled + * @file: The file pointer of the event to test + * + * If any triggers without filters are attached to this event, they + * will be called here. If the event is soft disabled and has no + * triggers that require testing the fields, it will return true, + * otherwise false. + */ +static inline bool +ftrace_trigger_soft_disabled(struct ftrace_event_file *file) +{ + unsigned long eflags = file->flags; + + if (!(eflags & FTRACE_EVENT_FL_TRIGGER_COND)) { + if (eflags & FTRACE_EVENT_FL_TRIGGER_MODE) + event_triggers_call(file, NULL); + if (eflags & FTRACE_EVENT_FL_SOFT_DISABLED) + return true; + } + return false; +} + +/* + * Helper function for event_trigger_unlock_commit{_regs}(). + * If there are event triggers attached to this event that requires + * filtering against its fields, then they wil be called as the + * entry already holds the field information of the current event. + * + * It also checks if the event should be discarded or not. + * It is to be discarded if the event is soft disabled and the + * event was only recorded to process triggers, or if the event + * filter is active and this event did not match the filters. + * + * Returns true if the event is discarded, false otherwise. + */ +static inline bool +__event_trigger_test_discard(struct ftrace_event_file *file, + struct ring_buffer *buffer, + struct ring_buffer_event *event, + void *entry, + enum event_trigger_type *tt) +{ + unsigned long eflags = file->flags; + + if (eflags & FTRACE_EVENT_FL_TRIGGER_COND) + *tt = event_triggers_call(file, entry); + + if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags)) + ring_buffer_discard_commit(buffer, event); + else if (!filter_check_discard(file, entry, buffer, event)) + return false; + + return true; +} + +/** + * event_trigger_unlock_commit - handle triggers and finish event commit + * @file: The file pointer assoctiated to the event + * @buffer: The ring buffer that the event is being written to + * @event: The event meta data in the ring buffer + * @entry: The event itself + * @irq_flags: The state of the interrupts at the start of the event + * @pc: The state of the preempt count at the start of the event. + * + * This is a helper function to handle triggers that require data + * from the event itself. It also tests the event against filters and + * if the event is soft disabled and should be discarded. + */ +static inline void +event_trigger_unlock_commit(struct ftrace_event_file *file, + struct ring_buffer *buffer, + struct ring_buffer_event *event, + void *entry, unsigned long irq_flags, int pc) +{ + enum event_trigger_type tt = ETT_NONE; + + if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) + trace_buffer_unlock_commit(buffer, event, irq_flags, pc); + + if (tt) + event_triggers_post_call(file, tt); +} + +/** + * event_trigger_unlock_commit_regs - handle triggers and finish event commit + * @file: The file pointer assoctiated to the event + * @buffer: The ring buffer that the event is being written to + * @event: The event meta data in the ring buffer + * @entry: The event itself + * @irq_flags: The state of the interrupts at the start of the event + * @pc: The state of the preempt count at the start of the event. + * + * This is a helper function to handle triggers that require data + * from the event itself. It also tests the event against filters and + * if the event is soft disabled and should be discarded. + * + * Same as event_trigger_unlock_commit() but calls + * trace_buffer_unlock_commit_regs() instead of trace_buffer_unlock_commit(). + */ +static inline void +event_trigger_unlock_commit_regs(struct ftrace_event_file *file, + struct ring_buffer *buffer, + struct ring_buffer_event *event, + void *entry, unsigned long irq_flags, int pc, + struct pt_regs *regs) +{ + enum event_trigger_type tt = ETT_NONE; + + if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) + trace_buffer_unlock_commit_regs(buffer, event, + irq_flags, pc, regs); + + if (tt) + event_triggers_post_call(file, tt); +} + +#ifdef CONFIG_BPF_SYSCALL +unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx); +#else +static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) +{ + return 1; +} +#endif + +enum { + FILTER_OTHER = 0, + FILTER_STATIC_STRING, + FILTER_DYN_STRING, + FILTER_PTR_STRING, + FILTER_TRACE_FN, +}; + +extern int trace_event_raw_init(struct ftrace_event_call *call); +extern int trace_define_field(struct ftrace_event_call *call, const char *type, + const char *name, int offset, int size, + int is_signed, int filter_type); +extern int trace_add_event_call(struct ftrace_event_call *call); +extern int trace_remove_event_call(struct ftrace_event_call *call); + +#define is_signed_type(type) (((type)(-1)) < (type)1) + +int trace_set_clr_event(const char *system, const char *event, int set); + +/* + * The double __builtin_constant_p is because gcc will give us an error + * if we try to allocate the static variable to fmt if it is not a + * constant. Even with the outer if statement optimizing out. + */ +#define event_trace_printk(ip, fmt, args...) \ +do { \ + __trace_printk_check_format(fmt, ##args); \ + tracing_record_cmdline(current); \ + if (__builtin_constant_p(fmt)) { \ + static const char *trace_printk_fmt \ + __attribute__((section("__trace_printk_fmt"))) = \ + __builtin_constant_p(fmt) ? fmt : NULL; \ + \ + __trace_bprintk(ip, trace_printk_fmt, ##args); \ + } else \ + __trace_printk(ip, fmt, ##args); \ +} while (0) + +#ifdef CONFIG_PERF_EVENTS +struct perf_event; + +DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); + +extern int perf_trace_init(struct perf_event *event); +extern void perf_trace_destroy(struct perf_event *event); +extern int perf_trace_add(struct perf_event *event, int flags); +extern void perf_trace_del(struct perf_event *event, int flags); +extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, + char *filter_str); +extern void ftrace_profile_free_filter(struct perf_event *event); +extern void *perf_trace_buf_prepare(int size, unsigned short type, + struct pt_regs **regs, int *rctxp); + +static inline void +perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, + u64 count, struct pt_regs *regs, void *head, + struct task_struct *task) +{ + perf_tp_event(addr, count, raw_data, size, regs, head, rctx, task); +} +#endif + +#endif /* _LINUX_FTRACE_EVENT_H */ -- cgit v1.2.3 From 645df987f7c1740bb1ba783ab907001720a20cf7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 4 May 2015 18:12:44 -0400 Subject: tracing: Rename ftrace_print_*() functions ta trace_print_*() The name "ftrace" really refers to the function hook infrastructure. It is not about the trace_events. The functions ftrace_print_*() are not part of the function infrastructure, and the names can be confusing. Rename them to be trace_print_*(). Signed-off-by: Steven Rostedt --- include/linux/trace_events.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index f8465d65f3c7..29627cbafdea 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -1,6 +1,6 @@ -#ifndef _LINUX_FTRACE_EVENT_H -#define _LINUX_FTRACE_EVENT_H +#ifndef _LINUX_TRACE_EVENT_H +#define _LINUX_TRACE_EVENT_H #include #include @@ -25,27 +25,27 @@ struct trace_print_flags_u64 { const char *name; }; -const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim, - unsigned long flags, - const struct trace_print_flags *flag_array); +const char *trace_print_flags_seq(struct trace_seq *p, const char *delim, + unsigned long flags, + const struct trace_print_flags *flag_array); -const char *ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, - const struct trace_print_flags *symbol_array); +const char *trace_print_symbols_seq(struct trace_seq *p, unsigned long val, + const struct trace_print_flags *symbol_array); #if BITS_PER_LONG == 32 -const char *ftrace_print_symbols_seq_u64(struct trace_seq *p, - unsigned long long val, - const struct trace_print_flags_u64 +const char *trace_print_symbols_seq_u64(struct trace_seq *p, + unsigned long long val, + const struct trace_print_flags_u64 *symbol_array); #endif -const char *ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr, - unsigned int bitmask_size); +const char *trace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr, + unsigned int bitmask_size); -const char *ftrace_print_hex_seq(struct trace_seq *p, - const unsigned char *buf, int len); +const char *trace_print_hex_seq(struct trace_seq *p, + const unsigned char *buf, int len); -const char *ftrace_print_array_seq(struct trace_seq *p, +const char *trace_print_array_seq(struct trace_seq *p, const void *buf, int count, size_t el_size); -- cgit v1.2.3 From 9023c930902fbbcf0cebf6110828700f792989a4 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 5 May 2015 09:39:12 -0400 Subject: tracing: Rename (un)register_ftrace_event() to (un)register_trace_event() The name "ftrace" really refers to the function hook infrastructure. It is not about the trace_events. The functions (un)register_ftrace_event() is really about trace_events, and the name should be register_trace_event() instead. Also renamed ftrace_event_reg() to trace_event_reg() for the same reason. Signed-off-by: Steven Rostedt --- include/linux/trace_events.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 29627cbafdea..99924c07a042 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -132,8 +132,8 @@ struct trace_event { struct trace_event_functions *funcs; }; -extern int register_ftrace_event(struct trace_event *event); -extern int unregister_ftrace_event(struct trace_event *event); +extern int register_trace_event(struct trace_event *event); +extern int unregister_trace_event(struct trace_event *event); /* Return values for print_line callback */ enum print_line_t { @@ -216,7 +216,7 @@ struct ftrace_event_class { int (*raw_init)(struct ftrace_event_call *); }; -extern int ftrace_event_reg(struct ftrace_event_call *event, +extern int trace_event_reg(struct ftrace_event_call *event, enum trace_reg type, void *data); struct ftrace_event_buffer { -- cgit v1.2.3 From 7f1d2f8210195c8c309d424a77dbf06a6d2186f4 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 5 May 2015 10:09:53 -0400 Subject: tracing: Rename ftrace_event_file to trace_event_file The name "ftrace" really refers to the function hook infrastructure. It is not about the trace_events. The structure ftrace_event_file is really about trace events and not "ftrace". Rename it to trace_event_file. Signed-off-by: Steven Rostedt --- include/linux/trace_events.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 99924c07a042..ae19233c7dd8 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -157,11 +157,11 @@ static inline enum print_line_t trace_handle_return(struct trace_seq *s) void tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, int pc); -struct ftrace_event_file; +struct trace_event_file; struct ring_buffer_event * trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer, - struct ftrace_event_file *ftrace_file, + struct trace_event_file *trace_file, int type, unsigned long len, unsigned long flags, int pc); struct ring_buffer_event * @@ -222,14 +222,14 @@ extern int trace_event_reg(struct ftrace_event_call *event, struct ftrace_event_buffer { struct ring_buffer *buffer; struct ring_buffer_event *event; - struct ftrace_event_file *ftrace_file; + struct trace_event_file *trace_file; void *entry; unsigned long flags; int pc; }; void *ftrace_event_buffer_reserve(struct ftrace_event_buffer *fbuffer, - struct ftrace_event_file *ftrace_file, + struct trace_event_file *trace_file, unsigned long len); void ftrace_event_buffer_commit(struct ftrace_event_buffer *fbuffer); @@ -349,7 +349,7 @@ enum { FTRACE_EVENT_FL_TRIGGER_COND = (1 << FTRACE_EVENT_FL_TRIGGER_COND_BIT), }; -struct ftrace_event_file { +struct trace_event_file { struct list_head list; struct ftrace_event_call *event_call; struct event_filter *filter; @@ -414,15 +414,15 @@ enum event_trigger_type { extern int filter_match_preds(struct event_filter *filter, void *rec); -extern int filter_check_discard(struct ftrace_event_file *file, void *rec, +extern int filter_check_discard(struct trace_event_file *file, void *rec, struct ring_buffer *buffer, struct ring_buffer_event *event); extern int call_filter_check_discard(struct ftrace_event_call *call, void *rec, struct ring_buffer *buffer, struct ring_buffer_event *event); -extern enum event_trigger_type event_triggers_call(struct ftrace_event_file *file, +extern enum event_trigger_type event_triggers_call(struct trace_event_file *file, void *rec); -extern void event_triggers_post_call(struct ftrace_event_file *file, +extern void event_triggers_post_call(struct trace_event_file *file, enum event_trigger_type tt); /** @@ -435,7 +435,7 @@ extern void event_triggers_post_call(struct ftrace_event_file *file, * otherwise false. */ static inline bool -ftrace_trigger_soft_disabled(struct ftrace_event_file *file) +ftrace_trigger_soft_disabled(struct trace_event_file *file) { unsigned long eflags = file->flags; @@ -462,7 +462,7 @@ ftrace_trigger_soft_disabled(struct ftrace_event_file *file) * Returns true if the event is discarded, false otherwise. */ static inline bool -__event_trigger_test_discard(struct ftrace_event_file *file, +__event_trigger_test_discard(struct trace_event_file *file, struct ring_buffer *buffer, struct ring_buffer_event *event, void *entry, @@ -495,7 +495,7 @@ __event_trigger_test_discard(struct ftrace_event_file *file, * if the event is soft disabled and should be discarded. */ static inline void -event_trigger_unlock_commit(struct ftrace_event_file *file, +event_trigger_unlock_commit(struct trace_event_file *file, struct ring_buffer *buffer, struct ring_buffer_event *event, void *entry, unsigned long irq_flags, int pc) @@ -526,7 +526,7 @@ event_trigger_unlock_commit(struct ftrace_event_file *file, * trace_buffer_unlock_commit_regs() instead of trace_buffer_unlock_commit(). */ static inline void -event_trigger_unlock_commit_regs(struct ftrace_event_file *file, +event_trigger_unlock_commit_regs(struct trace_event_file *file, struct ring_buffer *buffer, struct ring_buffer_event *event, void *entry, unsigned long irq_flags, int pc, -- cgit v1.2.3 From 2425bcb9240f8c97d793cb31c8e8d8d0a843fa29 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 5 May 2015 11:45:27 -0400 Subject: tracing: Rename ftrace_event_{call,class} to trace_event_{call,class} The name "ftrace" really refers to the function hook infrastructure. It is not about the trace_events. The structures ftrace_event_call and ftrace_event_class have nothing to do with the function hooks, and are really trace_event structures. Rename ftrace_event_* to trace_event_*. Signed-off-by: Steven Rostedt --- include/linux/module.h | 2 +- include/linux/perf_event.h | 2 +- include/linux/syscalls.h | 12 ++++++------ include/linux/trace_events.h | 38 +++++++++++++++++++------------------- 4 files changed, 27 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index c883b86ea964..3e0d492682bb 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -336,7 +336,7 @@ struct module { const char **trace_bprintk_fmt_start; #endif #ifdef CONFIG_EVENT_TRACING - struct ftrace_event_call **trace_events; + struct trace_event_call **trace_events; unsigned int num_trace_events; struct trace_enum_map **trace_enums; unsigned int num_trace_enums; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 61992cf2e977..d089d6d58ae0 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -483,7 +483,7 @@ struct perf_event { void *overflow_handler_context; #ifdef CONFIG_EVENT_TRACING - struct ftrace_event_call *tp_event; + struct trace_event_call *tp_event; struct event_filter *filter; #ifdef CONFIG_FUNCTION_TRACER struct ftrace_ops ftrace_ops; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 76d1e38aabe1..d8b06abb264f 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -111,14 +111,14 @@ union bpf_attr; #define __SC_STR_ADECL(t, a) #a #define __SC_STR_TDECL(t, a) #t -extern struct ftrace_event_class event_class_syscall_enter; -extern struct ftrace_event_class event_class_syscall_exit; +extern struct trace_event_class event_class_syscall_enter; +extern struct trace_event_class event_class_syscall_exit; extern struct trace_event_functions enter_syscall_print_funcs; extern struct trace_event_functions exit_syscall_print_funcs; #define SYSCALL_TRACE_ENTER_EVENT(sname) \ static struct syscall_metadata __syscall_meta_##sname; \ - static struct ftrace_event_call __used \ + static struct trace_event_call __used \ event_enter_##sname = { \ .class = &event_class_syscall_enter, \ { \ @@ -128,13 +128,13 @@ extern struct trace_event_functions exit_syscall_print_funcs; .data = (void *)&__syscall_meta_##sname,\ .flags = TRACE_EVENT_FL_CAP_ANY, \ }; \ - static struct ftrace_event_call __used \ + static struct trace_event_call __used \ __attribute__((section("_ftrace_events"))) \ *__event_enter_##sname = &event_enter_##sname; #define SYSCALL_TRACE_EXIT_EVENT(sname) \ static struct syscall_metadata __syscall_meta_##sname; \ - static struct ftrace_event_call __used \ + static struct trace_event_call __used \ event_exit_##sname = { \ .class = &event_class_syscall_exit, \ { \ @@ -144,7 +144,7 @@ extern struct trace_event_functions exit_syscall_print_funcs; .data = (void *)&__syscall_meta_##sname,\ .flags = TRACE_EVENT_FL_CAP_ANY, \ }; \ - static struct ftrace_event_call __used \ + static struct trace_event_call __used \ __attribute__((section("_ftrace_events"))) \ *__event_exit_##sname = &event_exit_##sname; diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index ae19233c7dd8..d10ab04a17b2 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -200,23 +200,23 @@ enum trace_reg { #endif }; -struct ftrace_event_call; +struct trace_event_call; -struct ftrace_event_class { +struct trace_event_class { const char *system; void *probe; #ifdef CONFIG_PERF_EVENTS void *perf_probe; #endif - int (*reg)(struct ftrace_event_call *event, + int (*reg)(struct trace_event_call *event, enum trace_reg type, void *data); - int (*define_fields)(struct ftrace_event_call *); - struct list_head *(*get_fields)(struct ftrace_event_call *); + int (*define_fields)(struct trace_event_call *); + struct list_head *(*get_fields)(struct trace_event_call *); struct list_head fields; - int (*raw_init)(struct ftrace_event_call *); + int (*raw_init)(struct trace_event_call *); }; -extern int trace_event_reg(struct ftrace_event_call *event, +extern int trace_event_reg(struct trace_event_call *event, enum trace_reg type, void *data); struct ftrace_event_buffer { @@ -269,9 +269,9 @@ enum { TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT), }; -struct ftrace_event_call { +struct trace_event_call { struct list_head list; - struct ftrace_event_class *class; + struct trace_event_class *class; union { char *name; /* Set TRACE_EVENT_FL_TRACEPOINT flag when using "tp" */ @@ -298,13 +298,13 @@ struct ftrace_event_call { struct hlist_head __percpu *perf_events; struct bpf_prog *prog; - int (*perf_perm)(struct ftrace_event_call *, + int (*perf_perm)(struct trace_event_call *, struct perf_event *); #endif }; static inline const char * -ftrace_event_name(struct ftrace_event_call *call) +ftrace_event_name(struct trace_event_call *call) { if (call->flags & TRACE_EVENT_FL_TRACEPOINT) return call->tp ? call->tp->name : NULL; @@ -351,7 +351,7 @@ enum { struct trace_event_file { struct list_head list; - struct ftrace_event_call *event_call; + struct trace_event_call *event_call; struct event_filter *filter; struct dentry *dir; struct trace_array *tr; @@ -388,7 +388,7 @@ struct trace_event_file { early_initcall(trace_init_flags_##name); #define __TRACE_EVENT_PERF_PERM(name, expr...) \ - static int perf_perm_##name(struct ftrace_event_call *tp_event, \ + static int perf_perm_##name(struct trace_event_call *tp_event, \ struct perf_event *p_event) \ { \ return ({ expr; }); \ @@ -417,7 +417,7 @@ extern int filter_match_preds(struct event_filter *filter, void *rec); extern int filter_check_discard(struct trace_event_file *file, void *rec, struct ring_buffer *buffer, struct ring_buffer_event *event); -extern int call_filter_check_discard(struct ftrace_event_call *call, void *rec, +extern int call_filter_check_discard(struct trace_event_call *call, void *rec, struct ring_buffer *buffer, struct ring_buffer_event *event); extern enum event_trigger_type event_triggers_call(struct trace_event_file *file, @@ -559,12 +559,12 @@ enum { FILTER_TRACE_FN, }; -extern int trace_event_raw_init(struct ftrace_event_call *call); -extern int trace_define_field(struct ftrace_event_call *call, const char *type, +extern int trace_event_raw_init(struct trace_event_call *call); +extern int trace_define_field(struct trace_event_call *call, const char *type, const char *name, int offset, int size, int is_signed, int filter_type); -extern int trace_add_event_call(struct ftrace_event_call *call); -extern int trace_remove_event_call(struct ftrace_event_call *call); +extern int trace_add_event_call(struct trace_event_call *call); +extern int trace_remove_event_call(struct trace_event_call *call); #define is_signed_type(type) (((type)(-1)) < (type)1) @@ -613,4 +613,4 @@ perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, } #endif -#endif /* _LINUX_FTRACE_EVENT_H */ +#endif /* _LINUX_TRACE_EVENT_H */ -- cgit v1.2.3 From 3f795dcfc7364cd811c3f6f03d115fcefbbdc1ca Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 5 May 2015 13:18:46 -0400 Subject: tracing: Rename ftrace_event_buffer to trace_event_buffer. The name "ftrace" really refers to the function hook infrastructure. It is not about the trace_events. The ftrace_event_buffer functions and data structures are for trace_events and not for function hooks. Rename them to trace_event_buffer*. Signed-off-by: Steven Rostedt --- include/linux/trace_events.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index d10ab04a17b2..a1fa8ebaf684 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -219,7 +219,7 @@ struct trace_event_class { extern int trace_event_reg(struct trace_event_call *event, enum trace_reg type, void *data); -struct ftrace_event_buffer { +struct trace_event_buffer { struct ring_buffer *buffer; struct ring_buffer_event *event; struct trace_event_file *trace_file; @@ -228,11 +228,11 @@ struct ftrace_event_buffer { int pc; }; -void *ftrace_event_buffer_reserve(struct ftrace_event_buffer *fbuffer, +void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, struct trace_event_file *trace_file, unsigned long len); -void ftrace_event_buffer_commit(struct ftrace_event_buffer *fbuffer); +void trace_event_buffer_commit(struct trace_event_buffer *fbuffer); enum { TRACE_EVENT_FL_FILTERED_BIT, -- cgit v1.2.3 From 892c505aac2bdded3c8ec2ec27abc6d74fd210f5 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 5 May 2015 14:18:11 -0400 Subject: tracing: Rename ftrace_output functions to trace_output The name "ftrace" really refers to the function hook infrastructure. It is not about the trace_events. The ftrace_output_*() and ftrace_raw_output_*() functions represent the trace_event code. Rename them to just trace_output or trace_raw_output. Signed-off-by: Steven Rostedt --- include/linux/trace_events.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index a1fa8ebaf684..12ca46322a94 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -52,8 +52,8 @@ const char *trace_print_array_seq(struct trace_seq *p, struct trace_iterator; struct trace_event; -int ftrace_raw_output_prep(struct trace_iterator *iter, - struct trace_event *event); +int trace_raw_output_prep(struct trace_iterator *iter, + struct trace_event *event); /* * The trace entry - the most basic unit of tracing. This is what @@ -183,7 +183,7 @@ void trace_current_buffer_discard_commit(struct ring_buffer *buffer, void tracing_record_cmdline(struct task_struct *tsk); -int ftrace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...); +int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...); struct event_filter; -- cgit v1.2.3 From 609a74045238c303bbe9396775eacf5bac1f51cc Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 13 May 2015 13:44:36 -0400 Subject: tracing: Rename FTRACE_MAX_EVENT to TRACE_EVENT_TYPE_MAX The name "ftrace" really refers to the function hook infrastructure. It is not about the trace_events. Rename the max trace_event type size to something more descriptive and appropriate. Signed-off-by: Steven Rostedt --- include/linux/trace_events.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 12ca46322a94..6f28464be418 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -68,7 +68,7 @@ struct trace_entry { int pid; }; -#define FTRACE_MAX_EVENT \ +#define TRACE_EVENT_TYPE_MAX \ ((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1) /* -- cgit v1.2.3 From 687fcc4aee4567df14e31e82d6993418b826f408 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 13 May 2015 14:20:14 -0400 Subject: tracing: Rename ftrace_event_name() to trace_event_name() The name "ftrace" really refers to the function hook infrastructure. It is not about the trace_events. ftrace_event_name() returns the name of an event tracepoint, has nothing to do with function tracing. Rename it to trace_event_name(). Signed-off-by: Steven Rostedt --- include/linux/trace_events.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 6f28464be418..15617798849c 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -304,7 +304,7 @@ struct trace_event_call { }; static inline const char * -ftrace_event_name(struct trace_event_call *call) +trace_event_name(struct trace_event_call *call) { if (call->flags & TRACE_EVENT_FL_TRACEPOINT) return call->tp ? call->tp->name : NULL; -- cgit v1.2.3 From 7967b3e0c40ff72fb2cf44d3b50e2cb388ef6c67 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 13 May 2015 14:59:40 -0400 Subject: tracing: Rename struct ftrace_subsystem_dir to trace_subsystem_dir The name "ftrace" really refers to the function hook infrastructure. It is not about the trace_events. The structure ftrace_subsystem_dir holds the information about trace event subsystems. It should not be named ftrace, rename it to trace_subsystem_dir. Signed-off-by: Steven Rostedt --- include/linux/trace_events.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 15617798849c..d4ad58ec684a 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -313,7 +313,7 @@ trace_event_name(struct trace_event_call *call) } struct trace_array; -struct ftrace_subsystem_dir; +struct trace_subsystem_dir; enum { FTRACE_EVENT_FL_ENABLED_BIT, @@ -355,7 +355,7 @@ struct trace_event_file { struct event_filter *filter; struct dentry *dir; struct trace_array *tr; - struct ftrace_subsystem_dir *system; + struct trace_subsystem_dir *system; struct list_head triggers; /* -- cgit v1.2.3 From 1bd758eb1cab2fa5b71a23f9e5d3c8076f4ed650 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:07 +0200 Subject: net: change name of flow_dissector header to match the .c file name add couple of empty lines on the way. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c0b574a414e7..e35c2b13d434 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -34,7 +34,7 @@ #include #include #include -#include +#include /* A. Checksumming of received packets by device. * -- cgit v1.2.3 From 10b89ee43e849544eddfe34e535341fc077464ec Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:09 +0200 Subject: net: move *skb_get_poff declarations into correct header Since these functions are defined in flow_dissector.c, move header declarations from skbuff.h into flow_dissector.h Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e35c2b13d434..17607ab9e7a2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3424,10 +3424,6 @@ struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb, unsigned int transport_len, __sum16(*skb_chkf)(struct sk_buff *skb)); -u32 skb_get_poff(const struct sk_buff *skb); -u32 __skb_get_poff(const struct sk_buff *skb, void *data, - const struct flow_keys *keys, int hlen); - /** * skb_head_is_locked - Determine if the skb->head is locked down * @skb: skb to check -- cgit v1.2.3 From 9c684b5083bc191c4b7b189c73d75587e167a474 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:11 +0200 Subject: net: move __skb_get_hash function declaration to flow_dissector.h Since the definition of the function is in flow_dissector.c, it makes sense to have the declaration in flow_dissector.h Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 17607ab9e7a2..ae2d1b7769d8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -918,7 +918,6 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type) skb->hash = hash; } -void __skb_get_hash(struct sk_buff *skb); static inline __u32 skb_get_hash(struct sk_buff *skb) { if (!skb->l4_hash && !skb->sw_hash) -- cgit v1.2.3 From 5605c76240aadc823e3d46ac9afde2f26fbcf019 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:12 +0200 Subject: net: move __skb_tx_hash to dev.c __skb_tx_hash function has no relation to flow_dissect so just move it to dev.c Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ include/linux/skbuff.h | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cd0951c1893d..d3ed01c18247 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2832,6 +2832,9 @@ static inline int netif_set_xps_queue(struct net_device *dev, } #endif +u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, + unsigned int num_tx_queues); + /* * Returns a Tx hash for the given packet when dev->real_num_tx_queues is used * as a distribution range limit for the returned value. diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ae2d1b7769d8..b01c7fba7c17 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3299,9 +3299,6 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb) return skb->queue_mapping != 0; } -u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, - unsigned int num_tx_queues); - static inline struct sec_path *skb_sec_path(struct sk_buff *skb) { #ifdef CONFIG_XFRM -- cgit v1.2.3 From 06635a35d13d42b95422bba6633f175245cc644e Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:16 +0200 Subject: flow_dissect: use programable dissector in skb_flow_dissect and friends Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b01c7fba7c17..f83aa6568cbf 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1935,8 +1935,8 @@ static inline void skb_probe_transport_header(struct sk_buff *skb, if (skb_transport_header_was_set(skb)) return; - else if (skb_flow_dissect(skb, &keys)) - skb_set_transport_header(skb, keys.thoff); + else if (skb_flow_dissect_flow_keys(skb, &keys)) + skb_set_transport_header(skb, keys.basic.thoff); else skb_set_transport_header(skb, offset_hint); } -- cgit v1.2.3 From 5d6ad960a71f0b36d95d74ef93285733b9f62f59 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 13 May 2015 15:12:33 -0400 Subject: tracing: Rename FTRACE_EVENT_FL_* flags to EVENT_FILE_FL_* The name "ftrace" really refers to the function hook infrastructure. It is not about the trace_events. The FTRACE_EVENT_FL_* flags are flags to do with the trace_event files in the tracefs directory. They are not related to function tracing. Rename them to a more descriptive name. Signed-off-by: Steven Rostedt --- include/linux/trace_events.h | 50 ++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index d4ad58ec684a..a46c138b2eea 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -250,11 +250,11 @@ enum { * FILTERED - The event has a filter attached * CAP_ANY - Any user can enable for perf * NO_SET_FILTER - Set when filter has error and is to be ignored - * IGNORE_ENABLE - For ftrace internal events, do not enable with debugfs file + * IGNORE_ENABLE - For trace internal events, do not enable with debugfs file * WAS_ENABLED - Set and stays set when an event was ever enabled * (used for module unloading, if a module event is enabled, * it is best to clear the buffers that used it). - * USE_CALL_FILTER - For ftrace internal events, don't use file filter + * USE_CALL_FILTER - For trace internal events, don't use file filter * TRACEPOINT - Event is a tracepoint * KPROBE - Event is a kprobe */ @@ -286,7 +286,7 @@ struct trace_event_call { * bit 0: filter_active * bit 1: allow trace by non root (cap any) * bit 2: failed to apply filter - * bit 3: ftrace internal event (do not enable) + * bit 3: trace internal event (do not enable) * bit 4: Event was enabled by module * bit 5: use call filter rather than file filter * bit 6: Event is a tracepoint @@ -316,18 +316,18 @@ struct trace_array; struct trace_subsystem_dir; enum { - FTRACE_EVENT_FL_ENABLED_BIT, - FTRACE_EVENT_FL_RECORDED_CMD_BIT, - FTRACE_EVENT_FL_FILTERED_BIT, - FTRACE_EVENT_FL_NO_SET_FILTER_BIT, - FTRACE_EVENT_FL_SOFT_MODE_BIT, - FTRACE_EVENT_FL_SOFT_DISABLED_BIT, - FTRACE_EVENT_FL_TRIGGER_MODE_BIT, - FTRACE_EVENT_FL_TRIGGER_COND_BIT, + EVENT_FILE_FL_ENABLED_BIT, + EVENT_FILE_FL_RECORDED_CMD_BIT, + EVENT_FILE_FL_FILTERED_BIT, + EVENT_FILE_FL_NO_SET_FILTER_BIT, + EVENT_FILE_FL_SOFT_MODE_BIT, + EVENT_FILE_FL_SOFT_DISABLED_BIT, + EVENT_FILE_FL_TRIGGER_MODE_BIT, + EVENT_FILE_FL_TRIGGER_COND_BIT, }; /* - * Ftrace event file flags: + * Event file flags: * ENABLED - The event is enabled * RECORDED_CMD - The comms should be recorded at sched_switch * FILTERED - The event has a filter attached @@ -339,14 +339,14 @@ enum { * TRIGGER_COND - When set, one or more triggers has an associated filter */ enum { - FTRACE_EVENT_FL_ENABLED = (1 << FTRACE_EVENT_FL_ENABLED_BIT), - FTRACE_EVENT_FL_RECORDED_CMD = (1 << FTRACE_EVENT_FL_RECORDED_CMD_BIT), - FTRACE_EVENT_FL_FILTERED = (1 << FTRACE_EVENT_FL_FILTERED_BIT), - FTRACE_EVENT_FL_NO_SET_FILTER = (1 << FTRACE_EVENT_FL_NO_SET_FILTER_BIT), - FTRACE_EVENT_FL_SOFT_MODE = (1 << FTRACE_EVENT_FL_SOFT_MODE_BIT), - FTRACE_EVENT_FL_SOFT_DISABLED = (1 << FTRACE_EVENT_FL_SOFT_DISABLED_BIT), - FTRACE_EVENT_FL_TRIGGER_MODE = (1 << FTRACE_EVENT_FL_TRIGGER_MODE_BIT), - FTRACE_EVENT_FL_TRIGGER_COND = (1 << FTRACE_EVENT_FL_TRIGGER_COND_BIT), + EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT), + EVENT_FILE_FL_RECORDED_CMD = (1 << EVENT_FILE_FL_RECORDED_CMD_BIT), + EVENT_FILE_FL_FILTERED = (1 << EVENT_FILE_FL_FILTERED_BIT), + EVENT_FILE_FL_NO_SET_FILTER = (1 << EVENT_FILE_FL_NO_SET_FILTER_BIT), + EVENT_FILE_FL_SOFT_MODE = (1 << EVENT_FILE_FL_SOFT_MODE_BIT), + EVENT_FILE_FL_SOFT_DISABLED = (1 << EVENT_FILE_FL_SOFT_DISABLED_BIT), + EVENT_FILE_FL_TRIGGER_MODE = (1 << EVENT_FILE_FL_TRIGGER_MODE_BIT), + EVENT_FILE_FL_TRIGGER_COND = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT), }; struct trace_event_file { @@ -439,10 +439,10 @@ ftrace_trigger_soft_disabled(struct trace_event_file *file) { unsigned long eflags = file->flags; - if (!(eflags & FTRACE_EVENT_FL_TRIGGER_COND)) { - if (eflags & FTRACE_EVENT_FL_TRIGGER_MODE) + if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) { + if (eflags & EVENT_FILE_FL_TRIGGER_MODE) event_triggers_call(file, NULL); - if (eflags & FTRACE_EVENT_FL_SOFT_DISABLED) + if (eflags & EVENT_FILE_FL_SOFT_DISABLED) return true; } return false; @@ -470,10 +470,10 @@ __event_trigger_test_discard(struct trace_event_file *file, { unsigned long eflags = file->flags; - if (eflags & FTRACE_EVENT_FL_TRIGGER_COND) + if (eflags & EVENT_FILE_FL_TRIGGER_COND) *tt = event_triggers_call(file, entry); - if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags)) + if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags)) ring_buffer_discard_commit(buffer, event); else if (!filter_check_discard(file, entry, buffer, event)) return false; -- cgit v1.2.3 From 09a5059aa1a2cbf8c8993e61b013cc83a0dd5833 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 13 May 2015 15:21:25 -0400 Subject: tracing: Rename ftrace_trigger_soft_disabled() to trace_trigger_soft_disabled() The name "ftrace" really refers to the function hook infrastructure. It is not about the trace_events. The ftrace_trigger_soft_disabled() tests if a trace_event is soft disabled (called but not traced), and returns true if it is. It has nothing to do with function tracing and should be renamed. Signed-off-by: Steven Rostedt --- include/linux/trace_events.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index a46c138b2eea..1063c850dbab 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -426,7 +426,7 @@ extern void event_triggers_post_call(struct trace_event_file *file, enum event_trigger_type tt); /** - * ftrace_trigger_soft_disabled - do triggers and test if soft disabled + * trace_trigger_soft_disabled - do triggers and test if soft disabled * @file: The file pointer of the event to test * * If any triggers without filters are attached to this event, they @@ -435,7 +435,7 @@ extern void event_triggers_post_call(struct trace_event_file *file, * otherwise false. */ static inline bool -ftrace_trigger_soft_disabled(struct trace_event_file *file) +trace_trigger_soft_disabled(struct trace_event_file *file) { unsigned long eflags = file->flags; -- cgit v1.2.3 From f0b5e8a42f37a880b8467e59dc814f4f21581d3d Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Tue, 12 May 2015 20:28:07 +0200 Subject: net: kill useless net_*_ingress_queue() definitions when NET_CLS_ACT is unset This fixes 4577139b2dabf589 ("net: use jump label patching for ingress qdisc in __netif_receive_skb_core"). The only client of this is sch_ingress and it depends on NET_CLS_ACT. So there is no way these definition can be of any help. Cc: Daniel Borkmann Signed-off-by: Pablo Neira Ayuso Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 7b8e260c4a27..bd29ab4b0941 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -82,14 +82,6 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev); #ifdef CONFIG_NET_CLS_ACT void net_inc_ingress_queue(void); void net_dec_ingress_queue(void); -#else -static inline void net_inc_ingress_queue(void) -{ -} - -static inline void net_dec_ingress_queue(void) -{ -} #endif extern void rtnetlink_init(void); -- cgit v1.2.3 From 25956b6612601cf36022392ffa83f6bf97939bcd Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Mon, 11 May 2015 12:17:13 +0800 Subject: ACPI / processor: Introduce invalid_logical_cpuid() In ACPI processor drivers, we use direct comparisons of cpu logical id with -1 which are error prone in case logical cpuid is accidentally assinged an error code and prevents us from returning an error-encoding cpuid directly in some cases. So introduce invalid_logical_cpuid() to identify cpu with invalid logical cpu num, then it will be used to replace the direct comparisons with -1. Signed-off-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index e4da5e35e29c..913b49f9a6e6 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -158,6 +158,11 @@ typedef u32 phys_cpuid_t; #define PHYS_CPUID_INVALID (phys_cpuid_t)(-1) #endif +static inline bool invalid_logical_cpuid(u32 cpuid) +{ + return (int)cpuid < 0; +} + #ifdef CONFIG_ACPI_HOTPLUG_CPU /* Arch dependent functions for cpu hotplug support */ int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu); -- cgit v1.2.3 From ddcc18f5bdd1aafd457032ec693fd9d0af764d61 Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Wed, 13 May 2015 16:19:30 +0800 Subject: ACPI / processor: Introduce invalid_phys_cpuid() Introduce invalid_phys_cpuid() to identify cpu with invalid physical ID, then used it as replacement of the direct comparisons with PHYS_CPUID_INVALID. Signed-off-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 913b49f9a6e6..90e4ed1eb191 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -163,6 +163,11 @@ static inline bool invalid_logical_cpuid(u32 cpuid) return (int)cpuid < 0; } +static inline bool invalid_phys_cpuid(phys_cpuid_t phys_id) +{ + return phys_id == PHYS_CPUID_INVALID; +} + #ifdef CONFIG_ACPI_HOTPLUG_CPU /* Arch dependent functions for cpu hotplug support */ int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu); -- cgit v1.2.3 From 1b852bceb0d111e510d1a15826ecc4a19358d512 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 8 May 2015 23:22:29 -0500 Subject: mnt: Refactor the logic for mounting sysfs and proc in a user namespace Fresh mounts of proc and sysfs are a very special case that works very much like a bind mount. Unfortunately the current structure can not preserve the MNT_LOCK... mount flags. Therefore refactor the logic into a form that can be modified to preserve those lock bits. Add a new filesystem flag FS_USERNS_VISIBLE that requires some mount of the filesystem be fully visible in the current mount namespace, before the filesystem may be mounted. Move the logic for calling fs_fully_visible from proc and sysfs into fs/namespace.c where it has greater access to mount namespace state. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 35ec87e490b1..2d24eeb8e59c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1897,6 +1897,7 @@ struct file_system_type { #define FS_HAS_SUBTYPE 4 #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */ +#define FS_USERNS_VISIBLE 32 /* FS must already be visible */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ struct dentry *(*mount) (struct file_system_type *, int, const char *, void *); @@ -1984,7 +1985,6 @@ extern int vfs_ustat(dev_t, struct kstatfs *); extern int freeze_super(struct super_block *super); extern int thaw_super(struct super_block *super); extern bool our_mnt(struct vfsmount *mnt); -extern bool fs_fully_visible(struct file_system_type *); extern int current_umask(void); -- cgit v1.2.3 From 87d5c18ce1f41a2410d4fb26d5d68c55867450f5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Wed, 13 May 2015 18:19:34 +0200 Subject: netfilter: cleanup struct nf_hook_ops indentation Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/linux/netfilter.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 63560d0a8dfe..83be4a3cec98 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -79,16 +79,16 @@ typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops, const struct nf_hook_state *state); struct nf_hook_ops { - struct list_head list; + struct list_head list; /* User fills in from here down. */ - nf_hookfn *hook; - struct module *owner; - void *priv; - u_int8_t pf; - unsigned int hooknum; + nf_hookfn *hook; + struct module *owner; + void *priv; + u_int8_t pf; + unsigned int hooknum; /* Hooks are ordered in ascending priority. */ - int priority; + int priority; }; struct nf_sockopt_ops { -- cgit v1.2.3 From f7191483461ce2ae579b6f7227fa7ce49e006656 Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Wed, 13 May 2015 18:19:35 +0200 Subject: netfilter: add hook list to nf_hook_state Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/linux/netfilter.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 83be4a3cec98..388ed1952242 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -54,10 +54,12 @@ struct nf_hook_state { struct net_device *in; struct net_device *out; struct sock *sk; + struct list_head *hook_list; int (*okfn)(struct sock *, struct sk_buff *); }; static inline void nf_hook_state_init(struct nf_hook_state *p, + struct list_head *hook_list, unsigned int hook, int thresh, u_int8_t pf, struct net_device *indev, @@ -71,6 +73,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, p->in = indev; p->out = outdev; p->sk = sk; + p->hook_list = hook_list; p->okfn = okfn; } @@ -166,8 +169,8 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, if (nf_hooks_active(pf, hook)) { struct nf_hook_state state; - nf_hook_state_init(&state, hook, thresh, pf, - indev, outdev, sk, okfn); + nf_hook_state_init(&state, &nf_hooks[pf][hook], hook, thresh, + pf, indev, outdev, sk, okfn); return nf_hook_slow(skb, &state); } return 1; -- cgit v1.2.3 From b8d0aad0c77f488d1d51a02d871a5cbc2d8032b9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Wed, 13 May 2015 18:19:36 +0200 Subject: netfilter: add nf_hook_list_active() In preparation to have netfilter ingress per-device hook list. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/linux/netfilter.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 388ed1952242..49d00638d1fa 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -134,26 +134,33 @@ extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; #ifdef HAVE_JUMP_LABEL extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; -static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) +static inline bool nf_hook_list_active(struct list_head *nf_hook_list, + u_int8_t pf, unsigned int hook) { if (__builtin_constant_p(pf) && __builtin_constant_p(hook)) return static_key_false(&nf_hooks_needed[pf][hook]); - return !list_empty(&nf_hooks[pf][hook]); + return !list_empty(nf_hook_list); } #else -static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) +static inline bool nf_hook_list_active(struct list_head *nf_hook_list, + u_int8_t pf, unsigned int hook) { - return !list_empty(&nf_hooks[pf][hook]); + return !list_empty(nf_hook_list); } #endif +static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) +{ + return nf_hook_list_active(&nf_hooks[pf][hook], pf, hook); +} + int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state); /** * nf_hook_thresh - call a netfilter hook - * + * * Returns 1 if the hook has allowed the packet to pass. The function * okfn must be invoked by the caller in this case. Any other return * value indicates the packet has been consumed by the hook. -- cgit v1.2.3 From 1cf51900f8545b358b5deaacfda348d990f671db Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Wed, 13 May 2015 18:19:37 +0200 Subject: net: add CONFIG_NET_INGRESS to enable ingress filtering This new config switch enables the ingress filtering infrastructure that is controlled through the ingress_needed static key. This prepares the introduction of the Netfilter ingress hook that resides under this unique static key. Note that CONFIG_SCH_INGRESS automatically selects this, that should be no problem since this also depends on CONFIG_NET_CLS_ACT. Signed-off-by: Pablo Neira Ayuso Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index bd29ab4b0941..a2324fb45cf4 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -79,7 +79,7 @@ static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) struct netdev_queue *dev_ingress_queue_create(struct net_device *dev); -#ifdef CONFIG_NET_CLS_ACT +#ifdef CONFIG_NET_INGRESS void net_inc_ingress_queue(void); void net_dec_ingress_queue(void); #endif -- cgit v1.2.3 From e687ad60af09010936bbd0b2a3b5d90a8ee8353c Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Wed, 13 May 2015 18:19:38 +0200 Subject: netfilter: add netfilter ingress hook after handle_ing() under unique static key This patch adds the Netfilter ingress hook just after the existing tc ingress hook, that seems to be the consensus solution for this. Note that the Netfilter hook resides under the global static key that enables ingress filtering. Nonetheless, Netfilter still also has its own static key for minimal impact on the existing handle_ing(). * Without this patch: Result: OK: 6216490(c6216338+d152) usec, 100000000 (60byte,0frags) 16086246pps 7721Mb/sec (7721398080bps) errors: 100000000 42.46% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core 25.92% kpktgend_0 [kernel.kallsyms] [k] kfree_skb 7.81% kpktgend_0 [pktgen] [k] pktgen_thread_worker 5.62% kpktgend_0 [kernel.kallsyms] [k] ip_rcv 2.70% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal 2.34% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk 1.44% kpktgend_0 [kernel.kallsyms] [k] __build_skb * With this patch: Result: OK: 6214833(c6214731+d101) usec, 100000000 (60byte,0frags) 16090536pps 7723Mb/sec (7723457280bps) errors: 100000000 41.23% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core 26.57% kpktgend_0 [kernel.kallsyms] [k] kfree_skb 7.72% kpktgend_0 [pktgen] [k] pktgen_thread_worker 5.55% kpktgend_0 [kernel.kallsyms] [k] ip_rcv 2.78% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal 2.06% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk 1.43% kpktgend_0 [kernel.kallsyms] [k] __build_skb * Without this patch + tc ingress: tc filter add dev eth4 parent ffff: protocol ip prio 1 \ u32 match ip dst 4.3.2.1/32 Result: OK: 9269001(c9268821+d179) usec, 100000000 (60byte,0frags) 10788648pps 5178Mb/sec (5178551040bps) errors: 100000000 40.99% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core 17.50% kpktgend_0 [kernel.kallsyms] [k] kfree_skb 11.77% kpktgend_0 [cls_u32] [k] u32_classify 5.62% kpktgend_0 [kernel.kallsyms] [k] tc_classify_compat 5.18% kpktgend_0 [pktgen] [k] pktgen_thread_worker 3.23% kpktgend_0 [kernel.kallsyms] [k] tc_classify 2.97% kpktgend_0 [kernel.kallsyms] [k] ip_rcv 1.83% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal 1.50% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk 0.99% kpktgend_0 [kernel.kallsyms] [k] __build_skb * With this patch + tc ingress: tc filter add dev eth4 parent ffff: protocol ip prio 1 \ u32 match ip dst 4.3.2.1/32 Result: OK: 9308218(c9308091+d126) usec, 100000000 (60byte,0frags) 10743194pps 5156Mb/sec (5156733120bps) errors: 100000000 42.01% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core 17.78% kpktgend_0 [kernel.kallsyms] [k] kfree_skb 11.70% kpktgend_0 [cls_u32] [k] u32_classify 5.46% kpktgend_0 [kernel.kallsyms] [k] tc_classify_compat 5.16% kpktgend_0 [pktgen] [k] pktgen_thread_worker 2.98% kpktgend_0 [kernel.kallsyms] [k] ip_rcv 2.84% kpktgend_0 [kernel.kallsyms] [k] tc_classify 1.96% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal 1.57% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk Note that the results are very similar before and after. I can see gcc gets the code under the ingress static key out of the hot path. Then, on that cold branch, it generates the code to accomodate the netfilter ingress static key. My explanation for this is that this reduces the pressure on the instruction cache for non-users as the new code is out of the hot path, and it comes with minimal impact for tc ingress users. Using gcc version 4.8.4 on: Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Little Endian CPU(s): 8 [...] L1d cache: 16K L1i cache: 64K L2 cache: 2048K L3 cache: 8192K Signed-off-by: Pablo Neira Ayuso Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ include/linux/netfilter.h | 1 + include/linux/netfilter_ingress.h | 41 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+) create mode 100644 include/linux/netfilter_ingress.h (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d3ed01c18247..51f8d2f5dc3f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1656,6 +1656,9 @@ struct net_device { struct tcf_proto __rcu *ingress_cl_list; #endif struct netdev_queue __rcu *ingress_queue; +#ifdef CONFIG_NETFILTER_INGRESS + struct list_head nf_hooks_ingress; +#endif unsigned char broadcast[MAX_ADDR_LEN]; #ifdef CONFIG_RFS_ACCEL diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 49d00638d1fa..f5ff5d156da8 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -86,6 +86,7 @@ struct nf_hook_ops { /* User fills in from here down. */ nf_hookfn *hook; + struct net_device *dev; struct module *owner; void *priv; u_int8_t pf; diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h new file mode 100644 index 000000000000..cb0727fe2b3d --- /dev/null +++ b/include/linux/netfilter_ingress.h @@ -0,0 +1,41 @@ +#ifndef _NETFILTER_INGRESS_H_ +#define _NETFILTER_INGRESS_H_ + +#include +#include + +#ifdef CONFIG_NETFILTER_INGRESS +static inline int nf_hook_ingress_active(struct sk_buff *skb) +{ + return nf_hook_list_active(&skb->dev->nf_hooks_ingress, + NFPROTO_NETDEV, NF_NETDEV_INGRESS); +} + +static inline int nf_hook_ingress(struct sk_buff *skb) +{ + struct nf_hook_state state; + + nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress, + NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, NULL, + skb->dev, NULL, NULL); + return nf_hook_slow(skb, &state); +} + +static inline void nf_hook_ingress_init(struct net_device *dev) +{ + INIT_LIST_HEAD(&dev->nf_hooks_ingress); +} +#else /* CONFIG_NETFILTER_INGRESS */ +static inline int nf_hook_ingress_active(struct sk_buff *skb) +{ + return 0; +} + +static inline int nf_hook_ingress(struct sk_buff *skb) +{ + return 0; +} + +static inline void nf_hook_ingress_init(struct net_device *dev) {} +#endif /* CONFIG_NETFILTER_INGRESS */ +#endif /* _NETFILTER_INGRESS_H_ */ -- cgit v1.2.3 From af6c235d1a5c112964c3029eb0ed4b52c7aa33bf Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 13 May 2015 13:03:21 +0200 Subject: gpio: discourage passing base to gpio_chip Passing a fixed base in struct gpio_chip is done for legacy systems that cannot handle dynamic allocation. Discourage this behaviour in the kerneldoc. Acked-by: Alexandre Courbot Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 96a678842cde..cc7ec129b329 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -42,8 +42,12 @@ struct seq_file; * @dbg_show: optional routine to show contents in debugfs; default code * will be used when this is omitted, but custom code can show extra * state (such as pullup/pulldown configuration). - * @base: identifies the first GPIO number handled by this chip; or, if - * negative during registration, requests dynamic ID allocation. + * @base: identifies the first GPIO number handled by this chip; + * or, if negative during registration, requests dynamic ID allocation. + * DEPRECATION: providing anything non-negative and nailing the base + * base offset of GPIO chips is deprecated. Please pass -1 as base to + * let gpiolib select the chip base in all possible cases. We want to + * get rid of the static GPIO number space in the long run. * @ngpio: the number of GPIOs handled by this controller; the last GPIO * handled is (base + ngpio - 1). * @desc: array of ngpio descriptors. Private. -- cgit v1.2.3 From 7fb48c5bc3100f7674a8e26f42c1518196500728 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 3 May 2015 22:05:28 +0200 Subject: netfilter: bridge: neigh_head and physoutdev can't be used at same time The neigh_header is only needed when we detect DNAT after prerouting and neigh cache didn't have a mac address for us. The output port has not been chosen yet so we can re-use the storage area, bringing struct size down to 32 bytes on x86_64. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/skbuff.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c0b574a414e7..3d932e64125a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -170,12 +170,14 @@ struct nf_bridge_info { BRNF_PROTO_UNCHANGED, BRNF_PROTO_8021Q, BRNF_PROTO_PPPOE - } orig_proto; + } orig_proto:8; bool pkt_otherhost; unsigned int mask; struct net_device *physindev; - struct net_device *physoutdev; - char neigh_header[8]; + union { + struct net_device *physoutdev; + char neigh_header[8]; + }; }; #endif -- cgit v1.2.3 From a3b1c1eb50f9b3e0c73c37157d0c61b2e90ae580 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 6 May 2015 16:28:57 +0200 Subject: netfilter: ipset: deinline ip_set_put_extensions() On x86 allyesconfig build: The function compiles to 489 bytes of machine code. It has 25 callsites. text data bss dec hex filename 82441375 22255384 20627456 125324215 7784bb7 vmlinux.before 82434909 22255384 20627456 125317749 7783275 vmlinux Signed-off-by: Denys Vlasenko CC: Jozsef Kadlecsik CC: Eric W. Biederman CC: David S. Miller CC: Jan Engelhardt CC: Jiri Pirko CC: linux-kernel@vger.kernel.org CC: netdev@vger.kernel.org CC: netfilter-devel@vger.kernel.org Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index f88be7258e5f..ffdfdc24952a 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -533,29 +533,9 @@ bitmap_bytes(u32 a, u32 b) #include #include -static inline int +int ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, - const void *e, bool active) -{ - if (SET_WITH_TIMEOUT(set)) { - unsigned long *timeout = ext_timeout(e, set); - - if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, - htonl(active ? ip_set_timeout_get(timeout) - : *timeout))) - return -EMSGSIZE; - } - if (SET_WITH_COUNTER(set) && - ip_set_put_counter(skb, ext_counter(e, set))) - return -EMSGSIZE; - if (SET_WITH_COMMENT(set) && - ip_set_put_comment(skb, ext_comment(e, set))) - return -EMSGSIZE; - if (SET_WITH_SKBINFO(set) && - ip_set_put_skbinfo(skb, ext_skbinfo(e, set))) - return -EMSGSIZE; - return 0; -} + const void *e, bool active); #define IP_SET_INIT_KEXT(skb, opt, set) \ { .bytes = (skb)->len, .packets = 1, \ -- cgit v1.2.3 From 922f2dd1b65a888e34c472979460dc23211750a2 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 12 May 2015 10:33:24 -0700 Subject: net: phy: Add phy_ignore_ta_mask to account for broken turn-around Some PHY devices/switches will not release the turn-around line as they should do at the end of a MDIO transaction. To help with such situations, allow MDIO bus drivers to be made aware of such restrictions. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 685809835b5c..701c7a3946e0 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -181,6 +181,9 @@ struct mii_bus { /* PHY addresses to be ignored when probing */ u32 phy_mask; + /* PHY addresses to ignore the TA/read failure */ + u32 phy_ignore_ta_mask; + /* * Pointer to an array of interrupts, each PHY's * interrupt at the index matching its address -- cgit v1.2.3 From 8c8a457a60050d5922676f81913d87e4af6fd97b Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 14 May 2015 14:31:01 +0300 Subject: sched: Remove redundant #ifdef Two adjacent members in task_struct were guarded by the same #define, so we can merge the two blocks. Signed-off-by: Nikolay Borisov Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1431603061-29408-1-git-send-email-kernel@kyup.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0eceeec5a01a..5f8defa155cf 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1398,8 +1398,6 @@ struct task_struct { int rcu_read_lock_nesting; union rcu_special rcu_read_unlock_special; struct list_head rcu_node_entry; -#endif /* #ifdef CONFIG_PREEMPT_RCU */ -#ifdef CONFIG_PREEMPT_RCU struct rcu_node *rcu_blocked_node; #endif /* #ifdef CONFIG_PREEMPT_RCU */ #ifdef CONFIG_TASKS_RCU -- cgit v1.2.3 From faad38492814112e3e7ce94d90123bbe301fff33 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 10 May 2015 01:18:03 +0200 Subject: sched / idle: Call idle_set_state() from cpuidle_enter_state() Introduce a wrapper function around idle_set_state() called sched_idle_set_state() that will pass this_rq() to it as the first argument and make cpuidle_enter_state() call the new function before and after entering the target state. At the same time, remove direct invocations of idle_set_state() from call_cpuidle(). This will allow the invocation of default_idle_call() to be moved from call_cpuidle() to cpuidle_enter_state() safely and call_cpuidle() to be simplified a bit as a result. Signed-off-by: Rafael J. Wysocki Reviewed-by: Preeti U Murthy Tested-by: Preeti U Murthy Tested-by: Sudeep Holla Acked-by: Kevin Hilman --- include/linux/cpuidle.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 9c5e89254796..301eaaab40e3 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -200,6 +200,9 @@ static inline struct cpuidle_driver *cpuidle_get_cpu_driver( struct cpuidle_device *dev) {return NULL; } #endif +/* kernel/sched/idle.c */ +extern void sched_idle_set_state(struct cpuidle_state *idle_state); + #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev, atomic_t *a); #else -- cgit v1.2.3 From 827a5aefc542b8fb17c00de06118e5cd0e3800f2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 10 May 2015 01:18:46 +0200 Subject: sched / idle: Call default_idle_call() from cpuidle_enter_state() The check of the cpuidle_enter() return value against -EBUSY made in call_cpuidle() will not be necessary any more if cpuidle_enter_state() calls default_idle_call() directly when it is about to return -EBUSY, so make that happen and eliminate the check. Signed-off-by: Rafael J. Wysocki Reviewed-by: Preeti U Murthy Tested-by: Preeti U Murthy Tested-by: Sudeep Holla Acked-by: Kevin Hilman --- include/linux/cpuidle.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 301eaaab40e3..c7a63643658e 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -202,6 +202,7 @@ static inline struct cpuidle_driver *cpuidle_get_cpu_driver( /* kernel/sched/idle.c */ extern void sched_idle_set_state(struct cpuidle_state *idle_state); +extern void default_idle_call(void); #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev, atomic_t *a); -- cgit v1.2.3 From 4573237b01221881702fbe6655f3ae5135be1c18 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 12 May 2015 12:22:34 +0530 Subject: cpufreq: Manage governor usage history with 'policy->last_governor' History of which governor was used last is common to all CPUs within a policy and maintaining it per-cpu isn't the best approach for sure. Apart from wasting memory, this also increases the complexity of managing this data structure as it has to be updated for all CPUs. To make that somewhat simpler, lets store this information in a new field 'last_governor' in struct cpufreq_policy and update it on removal of last cpu of a policy. As a side-effect it also solves an old problem, consider a system with two clusters 0 & 1. And there is one policy per cluster. Cluster 0: CPU0 and 1. Cluster 1: CPU2 and 3. - CPU2 is first brought online, and governor is set to performance (default as cpufreq_cpu_governor wasn't set). - Governor is changed to ondemand. - CPU2 is taken offline and cpufreq_cpu_governor is updated for CPU2. - CPU3 is brought online. - Because cpufreq_cpu_governor wasn't set for CPU3, the default governor performance is picked for CPU3. This patch fixes the bug as we now have a single variable to update for policy. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 2ee4888c1f47..48e37c07eb84 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -80,6 +80,7 @@ struct cpufreq_policy { struct cpufreq_governor *governor; /* see below */ void *governor_data; bool governor_enabled; /* governor start/stop flag */ + char last_governor[CPUFREQ_NAME_LEN]; /* last governor used */ struct work_struct update; /* if update_policy() needs to be * called, but you're in IRQ context */ -- cgit v1.2.3 From 8f4fc071b1926d0b20336e2b3f8ab85c94c734c5 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 14 May 2015 15:16:55 -0700 Subject: gfp: add __GFP_NOACCOUNT Not all kmem allocations should be accounted to memcg. The following patch gives an example when accounting of a certain type of allocations to memcg can effectively result in a memory leak. This patch adds the __GFP_NOACCOUNT flag which if passed to kmalloc and friends will force the allocation to go through the root cgroup. It will be used by the next patch. Note, since in case of kmemleak enabled each kmalloc implies yet another allocation from the kmemleak_object cache, we add __GFP_NOACCOUNT to gfp_kmemleak_mask. Alternatively, we could introduce a per kmem cache flag disabling accounting for all allocations of a particular kind, but (a) we would not be able to bypass accounting for kmalloc then and (b) a kmem cache with this flag set could not be merged with a kmem cache without this flag, which would increase the number of global caches and therefore fragmentation even if the memory cgroup controller is not used. Despite its generic name, currently __GFP_NOACCOUNT disables accounting only for kmem allocations while user page allocations are always charged. To catch abusing of this flag, a warning is issued on an attempt of passing it to mem_cgroup_try_charge. Signed-off-by: Vladimir Davydov Cc: Tejun Heo Cc: Johannes Weiner Cc: Michal Hocko Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Greg Thelen Cc: Greg Kroah-Hartman Cc: [4.0.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 ++ include/linux/memcontrol.h | 4 ++++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 97a9373e61e8..15928f0647e4 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -30,6 +30,7 @@ struct vm_area_struct; #define ___GFP_HARDWALL 0x20000u #define ___GFP_THISNODE 0x40000u #define ___GFP_RECLAIMABLE 0x80000u +#define ___GFP_NOACCOUNT 0x100000u #define ___GFP_NOTRACK 0x200000u #define ___GFP_NO_KSWAPD 0x400000u #define ___GFP_OTHER_NODE 0x800000u @@ -87,6 +88,7 @@ struct vm_area_struct; #define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL) /* Enforce hardwall cpuset memory allocs */ #define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE)/* No fallback, no policies */ #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */ +#define __GFP_NOACCOUNT ((__force gfp_t)___GFP_NOACCOUNT) /* Don't account to kmemcg */ #define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */ #define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 72dff5fb0d0c..6c8918114804 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -463,6 +463,8 @@ memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order) if (!memcg_kmem_enabled()) return true; + if (gfp & __GFP_NOACCOUNT) + return true; /* * __GFP_NOFAIL allocations will move on even if charging is not * possible. Therefore we don't even try, and have this allocation @@ -522,6 +524,8 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp) { if (!memcg_kmem_enabled()) return cachep; + if (gfp & __GFP_NOACCOUNT) + return cachep; if (gfp & __GFP_NOFAIL) return cachep; if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD)) -- cgit v1.2.3 From 929aa5b250bfc59aca492d3213c7f3a53e2a5247 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Thu, 14 May 2015 15:17:01 -0700 Subject: uidgid: make uid_valid and gid_valid work with !CONFIG_MULTIUSER {u,g}id_valid call {u,g}id_eq, which calls __k{u,g}id_val on both arguments and compares. With !CONFIG_MULTIUSER, __k{u,g}id_val return a constant 0, which makes {u,g}id_valid always return false. Change {u,g}id_valid to compare their argument against -1 instead. That produces identical results in the normal CONFIG_MULTIUSER=y case, but with !CONFIG_MULTIUSER will make {u,g}id_valid constant-fold into "return true;" rather than "return false;". This fixes uses of devpts without CONFIG_MULTIUSER. Signed-off-by: Josh Triplett Reported-by: Fengguang Wu , Cc: Peter Hurley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/uidgid.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uidgid.h b/include/linux/uidgid.h index 0ee05da38899..03835522dfcb 100644 --- a/include/linux/uidgid.h +++ b/include/linux/uidgid.h @@ -109,12 +109,12 @@ static inline bool gid_lte(kgid_t left, kgid_t right) static inline bool uid_valid(kuid_t uid) { - return !uid_eq(uid, INVALID_UID); + return __kuid_val(uid) != (uid_t) -1; } static inline bool gid_valid(kgid_t gid) { - return !gid_eq(gid, INVALID_GID); + return __kgid_val(gid) != (gid_t) -1; } #ifdef CONFIG_USER_NS -- cgit v1.2.3 From a4afd37b26f4b9f640310a89b7f8d176ae3460b1 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 13 May 2015 13:12:43 +0200 Subject: test_bpf: add tests related to BPF_MAXINSNS Couple of torture test cases related to the bug fixed in 0b59d8806a31 ("ARM: net: delegate filter to kernel interpreter when imm_offset() return value can't fit into 12bits."). I've added a helper to allocate and fill the insn space. Output on x86_64 from my laptop: test_bpf: #233 BPF_MAXINSNS: Maximum possible literals jited:0 7 PASS test_bpf: #234 BPF_MAXINSNS: Single literal jited:0 8 PASS test_bpf: #235 BPF_MAXINSNS: Run/add until end jited:0 11553 PASS test_bpf: #236 BPF_MAXINSNS: Too many instructions PASS test_bpf: #237 BPF_MAXINSNS: Very long jump jited:0 9 PASS test_bpf: #238 BPF_MAXINSNS: Ctx heavy transformations jited:0 20329 20398 PASS test_bpf: #239 BPF_MAXINSNS: Call heavy transformations jited:0 32178 32475 PASS test_bpf: #240 BPF_MAXINSNS: Jump heavy test jited:0 10518 PASS test_bpf: #233 BPF_MAXINSNS: Maximum possible literals jited:1 4 PASS test_bpf: #234 BPF_MAXINSNS: Single literal jited:1 4 PASS test_bpf: #235 BPF_MAXINSNS: Run/add until end jited:1 1625 PASS test_bpf: #236 BPF_MAXINSNS: Too many instructions PASS test_bpf: #237 BPF_MAXINSNS: Very long jump jited:1 8 PASS test_bpf: #238 BPF_MAXINSNS: Ctx heavy transformations jited:1 3301 3174 PASS test_bpf: #239 BPF_MAXINSNS: Call heavy transformations jited:1 24107 23491 PASS test_bpf: #240 BPF_MAXINSNS: Jump heavy test jited:1 8651 PASS Signed-off-by: Daniel Borkmann Cc: Alexei Starovoitov Cc: Nicolas Schichan Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index ce1d72d34382..200be4a74a33 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -277,6 +277,14 @@ struct bpf_prog_aux; .off = 0, \ .imm = 0 }) +/* Internal classic blocks for direct assignment */ + +#define __BPF_STMT(CODE, K) \ + ((struct sock_filter) BPF_STMT(CODE, K)) + +#define __BPF_JUMP(CODE, K, JT, JF) \ + ((struct sock_filter) BPF_JUMP(CODE, K, JT, JF)) + #define bytes_to_bpf_size(bytes) \ ({ \ int bpf_size = -EINVAL; \ -- cgit v1.2.3 From ef7f3a5c7149ad2dbd1d8a71d0aa88a02d1dbcb8 Mon Sep 17 00:00:00 2001 From: Bert Vermeulen Date: Wed, 13 May 2015 13:35:39 +0200 Subject: mdio-gpio: Propagate mii_bus.phy_ignore_ta_mask This also changes mii_bus.phy_mask to u32 for consistency. Signed-off-by: Bert Vermeulen Signed-off-by: David S. Miller --- include/linux/mdio-gpio.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mdio-gpio.h b/include/linux/mdio-gpio.h index 66c30a763b10..11f00cdabe3d 100644 --- a/include/linux/mdio-gpio.h +++ b/include/linux/mdio-gpio.h @@ -23,7 +23,8 @@ struct mdio_gpio_platform_data { bool mdio_active_low; bool mdo_active_low; - unsigned int phy_mask; + u32 phy_mask; + u32 phy_ignore_ta_mask; int irqs[PHY_MAX_ADDR]; /* reset callback */ int (*reset)(struct mii_bus *bus); -- cgit v1.2.3 From 8fa9dd24667f2d6997ec21341019657342859d31 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 23 Mar 2015 13:37:40 +1100 Subject: VFS/namei: make the use of touch_atime() in get_link() RCU-safe. touch_atime is not RCU-safe, and so cannot be called on an RCU walk. However, in situations where RCU-walk makes a difference, the symlink will likely to accessed much more often than it is useful to update the atime. So split out the test of "Does the atime actually need to be updated" into atime_needs_update(), and have get_link() unlazy if it finds that it will need to do that update. Signed-off-by: NeilBrown Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 8f738512c874..1426c435d455 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1880,6 +1880,7 @@ enum file_time_flags { S_VERSION = 8, }; +extern bool atime_needs_update(const struct path *, struct inode *); extern void touch_atime(const struct path *); static inline void file_accessed(struct file *file) { -- cgit v1.2.3 From 89076bc31950eee576ecc06460c23466e2d50939 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 12 May 2015 08:29:38 -0400 Subject: get rid of assorted nameidata-related debris pointless forward declarations, stale comments Signed-off-by: Al Viro --- include/linux/fs.h | 1 - include/linux/namei.h | 1 - include/linux/sched.h | 1 + 3 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 1426c435d455..b577e801b4af 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -38,7 +38,6 @@ struct backing_dev_info; struct export_operations; struct hd_geometry; struct iovec; -struct nameidata; struct kiocb; struct kobject; struct pipe_inode_info; diff --git a/include/linux/namei.h b/include/linux/namei.h index d756304aa09b..1208e489f83e 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -7,7 +7,6 @@ #include struct vfsmount; -struct nameidata; enum { MAX_NESTED_LINKS = 8 }; diff --git a/include/linux/sched.h b/include/linux/sched.h index f6c9b69d66f2..a1158c954f0f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -132,6 +132,7 @@ struct fs_struct; struct perf_event_context; struct blk_plug; struct filename; +struct nameidata; #define VMACACHE_BITS 2 #define VMACACHE_SIZE (1U << VMACACHE_BITS) -- cgit v1.2.3 From b853a16176cf3e02c57e215743015614152c2428 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 13 May 2015 09:12:02 -0400 Subject: turn user_{path_at,path,lpath,path_dir}() into static inlines Signed-off-by: Al Viro --- include/linux/namei.h | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/namei.h b/include/linux/namei.h index 1208e489f83e..d8c6334cd150 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -1,12 +1,10 @@ #ifndef _LINUX_NAMEI_H #define _LINUX_NAMEI_H -#include -#include -#include +#include #include - -struct vfsmount; +#include +#include enum { MAX_NESTED_LINKS = 8 }; @@ -46,13 +44,29 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; #define LOOKUP_ROOT 0x2000 #define LOOKUP_EMPTY 0x4000 -extern int user_path_at(int, const char __user *, unsigned, struct path *); extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, int *empty); -#define user_path(name, path) user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW, path) -#define user_lpath(name, path) user_path_at(AT_FDCWD, name, 0, path) -#define user_path_dir(name, path) \ - user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, path) +static inline int user_path_at(int dfd, const char __user *name, unsigned flags, + struct path *path) +{ + return user_path_at_empty(dfd, name, flags, path, NULL); +} + +static inline int user_path(const char __user *name, struct path *path) +{ + return user_path_at_empty(AT_FDCWD, name, LOOKUP_FOLLOW, path, NULL); +} + +static inline int user_lpath(const char __user *name, struct path *path) +{ + return user_path_at_empty(AT_FDCWD, name, 0, path, NULL); +} + +static inline int user_path_dir(const char __user *name, struct path *path) +{ + return user_path_at_empty(AT_FDCWD, name, + LOOKUP_FOLLOW | LOOKUP_DIRECTORY, path, NULL); +} extern int kern_path(const char *, unsigned, struct path *); -- cgit v1.2.3 From 55917a21d0cc012bb6073bb05bb768fd51d8e237 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 14 May 2015 14:57:23 +0200 Subject: netfilter: x_tables: add context to know if extension runs from nft_compat Currently, we have four xtables extensions that cannot be used from the xt over nft compat layer. The problem is that they need real access to the full blown xt_entry to validate that the rule comes with the right dependencies. This check was introduced to overcome the lack of sufficient userspace dependency validation in iptables. To resolve this problem, this patch introduces a new field to the xt_tgchk_param structure that tell us if the extension is run from nft_compat context. The three affected extensions are: 1) CLUSTERIP, this target has been superseded by xt_cluster. So just bail out by returning -EINVAL. 2) TCPMSS. Relax the checking when used from nft_compat. If used with the wrong configuration, it will corrupt !syn packets by adding TCP MSS option. 3) ebt_stp. Relax the check to make sure it uses the reserved destination MAC address for STP. Signed-off-by: Pablo Neira Ayuso Tested-by: Arturo Borrero Gonzalez --- include/linux/netfilter/x_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index a3e215bb0241..09f38206c18f 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -62,6 +62,7 @@ struct xt_mtchk_param { void *matchinfo; unsigned int hook_mask; u_int8_t family; + bool nft_compat; }; /** @@ -92,6 +93,7 @@ struct xt_tgchk_param { void *targinfo; unsigned int hook_mask; u_int8_t family; + bool nft_compat; }; /* Target destructor parameters */ -- cgit v1.2.3 From 07ee0722bf941960fb3888f9c9b5839473372fd1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 15 May 2015 11:30:47 +0800 Subject: rhashtable: Add cap on number of elements in hash table We currently have no limit on the number of elements in a hash table. This is a problem because some users (tipc) set a ceiling on the maximum table size and when that is reached the hash table may degenerate. Others may encounter OOM when growing and if we allow insertions when that happens the hash table perofrmance may also suffer. This patch adds a new paramater insecure_max_entries which becomes the cap on the table. If unset it defaults to max_size * 2. If it is also zero it means that there is no cap on the number of elements in the table. However, the table will grow whenever the utilisation hits 100% and if that growth fails, you will get ENOMEM on insertion. As allowing oversubscription is potentially dangerous, the name contains the word insecure. Note that the cap is not a hard limit. This is done for performance reasons as enforcing a hard limit will result in use of atomic ops that are heavier than the ones we currently use. The reasoning is that we're only guarding against a gross over- subscription of the table, rather than a small breach of the limit. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index dbcbcc59aa92..843ceca9a21e 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -17,6 +17,7 @@ #ifndef _LINUX_RHASHTABLE_H #define _LINUX_RHASHTABLE_H +#include #include #include #include @@ -100,6 +101,7 @@ struct rhashtable; * @key_len: Length of key * @key_offset: Offset of key in struct to be hashed * @head_offset: Offset of rhash_head in struct to be hashed + * @insecure_max_entries: Maximum number of entries (may be exceeded) * @max_size: Maximum size while expanding * @min_size: Minimum size while shrinking * @nulls_base: Base value to generate nulls marker @@ -115,6 +117,7 @@ struct rhashtable_params { size_t key_len; size_t key_offset; size_t head_offset; + unsigned int insecure_max_entries; unsigned int max_size; unsigned int min_size; u32 nulls_base; @@ -286,6 +289,18 @@ static inline bool rht_grow_above_100(const struct rhashtable *ht, (!ht->p.max_size || tbl->size < ht->p.max_size); } +/** + * rht_grow_above_max - returns true if table is above maximum + * @ht: hash table + * @tbl: current table + */ +static inline bool rht_grow_above_max(const struct rhashtable *ht, + const struct bucket_table *tbl) +{ + return ht->p.insecure_max_entries && + atomic_read(&ht->nelems) >= ht->p.insecure_max_entries; +} + /* The bucket lock is selected based on the hash and protects mutations * on a group of hash buckets. * @@ -589,6 +604,10 @@ restart: goto out; } + err = -E2BIG; + if (unlikely(rht_grow_above_max(ht, tbl))) + goto out; + if (unlikely(rht_grow_above_100(ht, tbl))) { slow_path: spin_unlock_bh(lock); -- cgit v1.2.3 From 3f7f642b9bc46453e1435e8b67f1c4f7949be7ff Mon Sep 17 00:00:00 2001 From: Martin Fuzzey Date: Wed, 13 May 2015 12:26:42 +0200 Subject: iio: core: add high pass filter attributes Add a high pass filter attribute for measurements (like the existing low pass) Also add both high and low pass attributes for events. Signed-off-by: Martin Fuzzey Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 1 + include/linux/iio/types.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 058441da4984..f79148261d16 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -32,6 +32,7 @@ enum iio_chan_info_enum { IIO_CHAN_INFO_QUADRATURE_CORRECTION_RAW, IIO_CHAN_INFO_AVERAGE_RAW, IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY, + IIO_CHAN_INFO_HIGH_PASS_FILTER_3DB_FREQUENCY, IIO_CHAN_INFO_SAMP_FREQ, IIO_CHAN_INFO_FREQUENCY, IIO_CHAN_INFO_PHASE, diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h index 942b6de68e2f..32b579525004 100644 --- a/include/linux/iio/types.h +++ b/include/linux/iio/types.h @@ -17,6 +17,8 @@ enum iio_event_info { IIO_EV_INFO_VALUE, IIO_EV_INFO_HYSTERESIS, IIO_EV_INFO_PERIOD, + IIO_EV_INFO_HIGH_PASS_FILTER_3DB, + IIO_EV_INFO_LOW_PASS_FILTER_3DB, }; #define IIO_VAL_INT 1 -- cgit v1.2.3 From c91d46065209bfe6124396fc409765ced0601b59 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 May 2015 05:48:07 -0700 Subject: net: fix two sparse errors First one in __skb_checksum_validate_complete() fixes the following (and other callers) make C=2 CF=-D__CHECK_ENDIAN__ net/ipv4/tcp_ipv4.o CHECK net/ipv4/tcp_ipv4.c include/linux/skbuff.h:3052:24: warning: incorrect type in return expression (different base types) include/linux/skbuff.h:3052:24: expected restricted __sum16 include/linux/skbuff.h:3052:24: got int Second is fixing gso_make_checksum() : CHECK net/ipv4/gre_offload.c include/linux/skbuff.h:3360:14: warning: incorrect type in assignment (different base types) include/linux/skbuff.h:3360:14: expected unsigned short [unsigned] [usertype] csum include/linux/skbuff.h:3360:14: got restricted __sum16 include/linux/skbuff.h:3365:16: warning: incorrect type in return expression (different base types) include/linux/skbuff.h:3365:16: expected restricted __sum16 include/linux/skbuff.h:3365:16: got unsigned short [unsigned] [usertype] csum Fixes: 5a21232983aa7 ("net: Support for csum_bad in skbuff") Fixes: 7e2b10c1e52ca ("net: Support for multiple checksums with gso") Signed-off-by: Eric Dumazet CC: Tom Herbert Acked-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f83aa6568cbf..b57eebfb67e0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3051,7 +3051,7 @@ static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb, } } else if (skb->csum_bad) { /* ip_summed == CHECKSUM_NONE in this case */ - return 1; + return (__force __sum16)1; } skb->csum = psum; @@ -3353,15 +3353,14 @@ static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra) static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res) { int plen = SKB_GSO_CB(skb)->csum_start - skb_headroom(skb) - - skb_transport_offset(skb); - __u16 csum; + skb_transport_offset(skb); + __wsum partial; - csum = csum_fold(csum_partial(skb_transport_header(skb), - plen, skb->csum)); + partial = csum_partial(skb_transport_header(skb), plen, skb->csum); skb->csum = res; SKB_GSO_CB(skb)->csum_start -= plen; - return csum; + return csum_fold(partial); } static inline bool skb_is_gso(const struct sk_buff *skb) -- cgit v1.2.3 From e1031dc1f7ba5c8724ba211062134076df292791 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 7 May 2015 17:38:07 +0200 Subject: dmaengine: Support different source and destination stride In interleaved mode, we can expect to have different source and destination strides. Add support for such case to dmaengine. Signed-off-by: Maxime Ripard Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index ad419757241f..5d63acb09813 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -122,10 +122,18 @@ enum dma_transfer_direction { * chunk and before first src/dst address for next chunk. * Ignored for dst(assumed 0), if dst_inc is true and dst_sgl is false. * Ignored for src(assumed 0), if src_inc is true and src_sgl is false. + * @dst_icg: Number of bytes to jump after last dst address of this + * chunk and before the first dst address for next chunk. + * Ignored if dst_inc is true and dst_sgl is false. + * @src_icg: Number of bytes to jump after last src address of this + * chunk and before the first src address for next chunk. + * Ignored if src_inc is true and src_sgl is false. */ struct data_chunk { size_t size; size_t icg; + size_t dst_icg; + size_t src_icg; }; /** -- cgit v1.2.3 From 4cfafd3082afc707653aeb82e9f8e7b596fbbfd6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 May 2015 12:23:11 +0200 Subject: sched,perf: Fix periodic timers In the below two commits (see Fixes) we have periodic timers that can stop themselves when they're no longer required, but need to be (re)-started when their idle condition changes. Further complications is that we want the timer handler to always do the forward such that it will always correctly deal with the overruns, and we do not want to race such that the handler has already decided to stop, but the (external) restart sees the timer still active and we end up with a 'lost' timer. The problem with the current code is that the re-start can come before the callback does the forward, at which point the forward from the callback will WARN about forwarding an enqueued timer. Now, conceptually its easy to detect if you're before or after the fwd by comparing the expiration time against the current time. Of course, that's expensive (and racy) because we don't have the current time. Alternatively one could cache this state inside the timer, but then everybody pays the overhead of maintaining this extra state, and that is undesired. The only other option that I could see is the external timer_active variable, which I tried to kill before. I would love a nicer interface for this seemingly simple 'problem' but alas. Fixes: 272325c4821f ("perf: Fix mux_interval hrtimer wreckage") Fixes: 77a4d1a1b9a1 ("sched: Cleanup bandwidth timers") Cc: pjt@google.com Cc: tglx@linutronix.de Cc: klamm@yandex-team.ru Cc: mingo@kernel.org Cc: bsegall@google.com Cc: hpa@zytor.com Cc: Sasha Levin Signed-off-by: Peter Zijlstra (Intel) Link: http://lkml.kernel.org/r/20150514102311.GX21418@twins.programming.kicks-ass.net --- include/linux/perf_event.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 61992cf2e977..cf3342a8ad80 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -566,8 +566,12 @@ struct perf_cpu_context { struct perf_event_context *task_ctx; int active_oncpu; int exclusive; + + raw_spinlock_t hrtimer_lock; struct hrtimer hrtimer; ktime_t hrtimer_interval; + unsigned int hrtimer_active; + struct pmu *unique_pmu; struct perf_cgroup *cgrp; }; -- cgit v1.2.3 From 5f22f5c668204f3af7557018b2ad6cf2074defac Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Sat, 16 May 2015 11:44:13 +0200 Subject: irqdomain: Add non-hierarchy helper irq_domain_set_info This adds the helper irq_domain_set_info() in a non-domain hierarchy variant. This allows to use the helper for generic chip since not all chips using generic chip support domain hierarchy. Signed-off-by: Stefan Agner Cc: marc.zyngier@arm.com Cc: linux@arm.linux.org.uk Cc: u.kleine-koenig@pengutronix.de Cc: olof@lixom.net Cc: arnd@arndb.de Cc: daniel.lezcano@linaro.org Cc: mark.rutland@arm.com Cc: pawel.moll@arm.com Cc: robh+dt@kernel.org Cc: ijc+devicetree@hellion.org.uk Cc: galak@codeaurora.org Cc: mcoquelin.stm32@gmail.com Cc: linux-arm-kernel@lists.infradead.org Cc: shawn.guo@linaro.org Cc: kernel@pengutronix.de Cc: jason@lakedaemon.net Link: http://lkml.kernel.org/r/1431769465-26867-2-git-send-email-stefan@agner.ch Signed-off-by: Thomas Gleixner --- include/linux/irqdomain.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 676d7306a360..744ac0ec98eb 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -258,6 +258,10 @@ int irq_domain_xlate_onetwocell(struct irq_domain *d, struct device_node *ctrlr, /* V2 interfaces to support hierarchy IRQ domains. */ extern struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, unsigned int virq); +extern void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, + irq_hw_number_t hwirq, struct irq_chip *chip, + void *chip_data, irq_flow_handler_t handler, + void *handler_data, const char *handler_name); #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY extern struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *parent, unsigned int flags, unsigned int size, @@ -281,10 +285,6 @@ extern int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, irq_hw_number_t hwirq, struct irq_chip *chip, void *chip_data); -extern void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq, struct irq_chip *chip, - void *chip_data, irq_flow_handler_t handler, - void *handler_data, const char *handler_name); extern void irq_domain_reset_irq_data(struct irq_data *irq_data); extern void irq_domain_free_irqs_common(struct irq_domain *domain, unsigned int virq, -- cgit v1.2.3 From 3cfeffc265791bc953527458e0a44ea77c459340 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Sat, 16 May 2015 11:44:14 +0200 Subject: genirq: Add irq_chip_(enable/disable)_parent Add helper irq_chip_enable_parent and irq_chip_disable_parent. The helper implement the default behavior in case irq_enable or irq_disable is not implemented for the parent interrupt chip, which is calling the irq_mask or irq_unmask respectively. Signed-off-by: Stefan Agner Cc: marc.zyngier@arm.com Cc: linux@arm.linux.org.uk Cc: u.kleine-koenig@pengutronix.de Cc: olof@lixom.net Cc: arnd@arndb.de Cc: daniel.lezcano@linaro.org Cc: mark.rutland@arm.com Cc: pawel.moll@arm.com Cc: robh+dt@kernel.org Cc: ijc+devicetree@hellion.org.uk Cc: galak@codeaurora.org Cc: mcoquelin.stm32@gmail.com Cc: linux-arm-kernel@lists.infradead.org Cc: shawn.guo@linaro.org Cc: kernel@pengutronix.de Cc: jason@lakedaemon.net Link: http://lkml.kernel.org/r/1431769465-26867-3-git-send-email-stefan@agner.ch Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 62c6901cab55..2633061364b1 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -458,6 +458,8 @@ extern void handle_nested_irq(unsigned int irq); extern int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg); #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY +extern void irq_chip_enable_parent(struct irq_data *data); +extern void irq_chip_disable_parent(struct irq_data *data); extern void irq_chip_ack_parent(struct irq_data *data); extern int irq_chip_retrigger_hierarchy(struct irq_data *data); extern void irq_chip_mask_parent(struct irq_data *data); -- cgit v1.2.3 From b4a04ab7a37b490cad48e69abfe14288cacb669c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 13 May 2015 15:38:40 -0400 Subject: cgroup: separate out include/linux/cgroup-defs.h From 2d728f74bfc071df06773e2fd7577dd5dab6425d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 13 May 2015 15:37:01 -0400 This patch separates out cgroup-defs.h from cgroup.h which has grown a lot of dependencies. cgroup-defs.h currently only contains constant and type definitions and can be used to break circular include dependency. While moving, definitions are reordered so that cgroup-defs.h has consistent logical structure. This patch is pure reorganization. Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 464 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/cgroup.h | 455 +------------------------------------------ 2 files changed, 466 insertions(+), 453 deletions(-) create mode 100644 include/linux/cgroup-defs.h (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h new file mode 100644 index 000000000000..55f3120fb952 --- /dev/null +++ b/include/linux/cgroup-defs.h @@ -0,0 +1,464 @@ +/* + * linux/cgroup-defs.h - basic definitions for cgroup + * + * This file provides basic type and interface. Include this file directly + * only if necessary to avoid cyclic dependencies. + */ +#ifndef _LINUX_CGROUP_DEFS_H +#define _LINUX_CGROUP_DEFS_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_CGROUPS + +struct cgroup; +struct cgroup_root; +struct cgroup_subsys; +struct cgroup_taskset; +struct kernfs_node; +struct kernfs_ops; +struct kernfs_open_file; + +#define MAX_CGROUP_TYPE_NAMELEN 32 +#define MAX_CGROUP_ROOT_NAMELEN 64 +#define MAX_CFTYPE_NAME 64 + +/* define the enumeration of all cgroup subsystems */ +#define SUBSYS(_x) _x ## _cgrp_id, +enum cgroup_subsys_id { +#include + CGROUP_SUBSYS_COUNT, +}; +#undef SUBSYS + +/* bits in struct cgroup_subsys_state flags field */ +enum { + CSS_NO_REF = (1 << 0), /* no reference counting for this css */ + CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ + CSS_RELEASED = (1 << 2), /* refcnt reached zero, released */ +}; + +/* bits in struct cgroup flags field */ +enum { + /* Control Group requires release notifications to userspace */ + CGRP_NOTIFY_ON_RELEASE, + /* + * Clone the parent's configuration when creating a new child + * cpuset cgroup. For historical reasons, this option can be + * specified at mount time and thus is implemented here. + */ + CGRP_CPUSET_CLONE_CHILDREN, +}; + +/* cgroup_root->flags */ +enum { + CGRP_ROOT_SANE_BEHAVIOR = (1 << 0), /* __DEVEL__sane_behavior specified */ + CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ + CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ +}; + +/* cftype->flags */ +enum { + CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ + CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ + CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ + + /* internal flags, do not use outside cgroup core proper */ + __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ + __CFTYPE_NOT_ON_DFL = (1 << 17), /* not on default hierarchy */ +}; + +/* + * Per-subsystem/per-cgroup state maintained by the system. This is the + * fundamental structural building block that controllers deal with. + * + * Fields marked with "PI:" are public and immutable and may be accessed + * directly without synchronization. + */ +struct cgroup_subsys_state { + /* PI: the cgroup that this css is attached to */ + struct cgroup *cgroup; + + /* PI: the cgroup subsystem that this css is attached to */ + struct cgroup_subsys *ss; + + /* reference count - access via css_[try]get() and css_put() */ + struct percpu_ref refcnt; + + /* PI: the parent css */ + struct cgroup_subsys_state *parent; + + /* siblings list anchored at the parent's ->children */ + struct list_head sibling; + struct list_head children; + + /* + * PI: Subsys-unique ID. 0 is unused and root is always 1. The + * matching css can be looked up using css_from_id(). + */ + int id; + + unsigned int flags; + + /* + * Monotonically increasing unique serial number which defines a + * uniform order among all csses. It's guaranteed that all + * ->children lists are in the ascending order of ->serial_nr and + * used to allow interrupting and resuming iterations. + */ + u64 serial_nr; + + /* percpu_ref killing and RCU release */ + struct rcu_head rcu_head; + struct work_struct destroy_work; +}; + +/* + * A css_set is a structure holding pointers to a set of + * cgroup_subsys_state objects. This saves space in the task struct + * object and speeds up fork()/exit(), since a single inc/dec and a + * list_add()/del() can bump the reference count on the entire cgroup + * set for a task. + */ +struct css_set { + /* Reference count */ + atomic_t refcount; + + /* + * List running through all cgroup groups in the same hash + * slot. Protected by css_set_lock + */ + struct hlist_node hlist; + + /* + * Lists running through all tasks using this cgroup group. + * mg_tasks lists tasks which belong to this cset but are in the + * process of being migrated out or in. Protected by + * css_set_rwsem, but, during migration, once tasks are moved to + * mg_tasks, it can be read safely while holding cgroup_mutex. + */ + struct list_head tasks; + struct list_head mg_tasks; + + /* + * List of cgrp_cset_links pointing at cgroups referenced from this + * css_set. Protected by css_set_lock. + */ + struct list_head cgrp_links; + + /* the default cgroup associated with this css_set */ + struct cgroup *dfl_cgrp; + + /* + * Set of subsystem states, one for each subsystem. This array is + * immutable after creation apart from the init_css_set during + * subsystem registration (at boot time). + */ + struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; + + /* + * List of csets participating in the on-going migration either as + * source or destination. Protected by cgroup_mutex. + */ + struct list_head mg_preload_node; + struct list_head mg_node; + + /* + * If this cset is acting as the source of migration the following + * two fields are set. mg_src_cgrp is the source cgroup of the + * on-going migration and mg_dst_cset is the destination cset the + * target tasks on this cset should be migrated to. Protected by + * cgroup_mutex. + */ + struct cgroup *mg_src_cgrp; + struct css_set *mg_dst_cset; + + /* + * On the default hierarhcy, ->subsys[ssid] may point to a css + * attached to an ancestor instead of the cgroup this css_set is + * associated with. The following node is anchored at + * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to + * iterate through all css's attached to a given cgroup. + */ + struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; + + /* For RCU-protected deletion */ + struct rcu_head rcu_head; +}; + +struct cgroup { + /* self css with NULL ->ss, points back to this cgroup */ + struct cgroup_subsys_state self; + + unsigned long flags; /* "unsigned long" so bitops work */ + + /* + * idr allocated in-hierarchy ID. + * + * ID 0 is not used, the ID of the root cgroup is always 1, and a + * new cgroup will be assigned with a smallest available ID. + * + * Allocating/Removing ID must be protected by cgroup_mutex. + */ + int id; + + /* + * If this cgroup contains any tasks, it contributes one to + * populated_cnt. All children with non-zero popuplated_cnt of + * their own contribute one. The count is zero iff there's no task + * in this cgroup or its subtree. + */ + int populated_cnt; + + struct kernfs_node *kn; /* cgroup kernfs entry */ + struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */ + + /* + * The bitmask of subsystems enabled on the child cgroups. + * ->subtree_control is the one configured through + * "cgroup.subtree_control" while ->child_subsys_mask is the + * effective one which may have more subsystems enabled. + * Controller knobs are made available iff it's enabled in + * ->subtree_control. + */ + unsigned int subtree_control; + unsigned int child_subsys_mask; + + /* Private pointers for each registered subsystem */ + struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; + + struct cgroup_root *root; + + /* + * List of cgrp_cset_links pointing at css_sets with tasks in this + * cgroup. Protected by css_set_lock. + */ + struct list_head cset_links; + + /* + * On the default hierarchy, a css_set for a cgroup with some + * susbsys disabled will point to css's which are associated with + * the closest ancestor which has the subsys enabled. The + * following lists all css_sets which point to this cgroup's css + * for the given subsystem. + */ + struct list_head e_csets[CGROUP_SUBSYS_COUNT]; + + /* + * list of pidlists, up to two for each namespace (one for procs, one + * for tasks); created on demand. + */ + struct list_head pidlists; + struct mutex pidlist_mutex; + + /* used to wait for offlining of csses */ + wait_queue_head_t offline_waitq; + + /* used to schedule release agent */ + struct work_struct release_agent_work; +}; + +/* + * A cgroup_root represents the root of a cgroup hierarchy, and may be + * associated with a kernfs_root to form an active hierarchy. This is + * internal to cgroup core. Don't access directly from controllers. + */ +struct cgroup_root { + struct kernfs_root *kf_root; + + /* The bitmask of subsystems attached to this hierarchy */ + unsigned int subsys_mask; + + /* Unique id for this hierarchy. */ + int hierarchy_id; + + /* The root cgroup. Root is destroyed on its release. */ + struct cgroup cgrp; + + /* Number of cgroups in the hierarchy, used only for /proc/cgroups */ + atomic_t nr_cgrps; + + /* A list running through the active hierarchies */ + struct list_head root_list; + + /* Hierarchy-specific flags */ + unsigned int flags; + + /* IDs for cgroups in this hierarchy */ + struct idr cgroup_idr; + + /* The path to use for release notifications. */ + char release_agent_path[PATH_MAX]; + + /* The name for this hierarchy - may be empty */ + char name[MAX_CGROUP_ROOT_NAMELEN]; +}; + +/* + * struct cftype: handler definitions for cgroup control files + * + * When reading/writing to a file: + * - the cgroup to use is file->f_path.dentry->d_parent->d_fsdata + * - the 'cftype' of the file is file->f_path.dentry->d_fsdata + */ +struct cftype { + /* + * By convention, the name should begin with the name of the + * subsystem, followed by a period. Zero length string indicates + * end of cftype array. + */ + char name[MAX_CFTYPE_NAME]; + int private; + /* + * If not 0, file mode is set to this value, otherwise it will + * be figured out automatically + */ + umode_t mode; + + /* + * The maximum length of string, excluding trailing nul, that can + * be passed to write. If < PAGE_SIZE-1, PAGE_SIZE-1 is assumed. + */ + size_t max_write_len; + + /* CFTYPE_* flags */ + unsigned int flags; + + /* + * Fields used for internal bookkeeping. Initialized automatically + * during registration. + */ + struct cgroup_subsys *ss; /* NULL for cgroup core files */ + struct list_head node; /* anchored at ss->cfts */ + struct kernfs_ops *kf_ops; + + /* + * read_u64() is a shortcut for the common case of returning a + * single integer. Use it in place of read() + */ + u64 (*read_u64)(struct cgroup_subsys_state *css, struct cftype *cft); + /* + * read_s64() is a signed version of read_u64() + */ + s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft); + + /* generic seq_file read interface */ + int (*seq_show)(struct seq_file *sf, void *v); + + /* optional ops, implement all or none */ + void *(*seq_start)(struct seq_file *sf, loff_t *ppos); + void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos); + void (*seq_stop)(struct seq_file *sf, void *v); + + /* + * write_u64() is a shortcut for the common case of accepting + * a single integer (as parsed by simple_strtoull) from + * userspace. Use in place of write(); return 0 or error. + */ + int (*write_u64)(struct cgroup_subsys_state *css, struct cftype *cft, + u64 val); + /* + * write_s64() is a signed version of write_u64() + */ + int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft, + s64 val); + + /* + * write() is the generic write callback which maps directly to + * kernfs write operation and overrides all other operations. + * Maximum write size is determined by ->max_write_len. Use + * of_css/cft() to access the associated css and cft. + */ + ssize_t (*write)(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off); + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lock_class_key lockdep_key; +#endif +}; + +/* + * Control Group subsystem type. + * See Documentation/cgroups/cgroups.txt for details + */ +struct cgroup_subsys { + struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css); + int (*css_online)(struct cgroup_subsys_state *css); + void (*css_offline)(struct cgroup_subsys_state *css); + void (*css_released)(struct cgroup_subsys_state *css); + void (*css_free)(struct cgroup_subsys_state *css); + void (*css_reset)(struct cgroup_subsys_state *css); + void (*css_e_css_changed)(struct cgroup_subsys_state *css); + + int (*can_attach)(struct cgroup_subsys_state *css, + struct cgroup_taskset *tset); + void (*cancel_attach)(struct cgroup_subsys_state *css, + struct cgroup_taskset *tset); + void (*attach)(struct cgroup_subsys_state *css, + struct cgroup_taskset *tset); + void (*fork)(struct task_struct *task); + void (*exit)(struct cgroup_subsys_state *css, + struct cgroup_subsys_state *old_css, + struct task_struct *task); + void (*bind)(struct cgroup_subsys_state *root_css); + + int disabled; + int early_init; + + /* + * If %false, this subsystem is properly hierarchical - + * configuration, resource accounting and restriction on a parent + * cgroup cover those of its children. If %true, hierarchy support + * is broken in some ways - some subsystems ignore hierarchy + * completely while others are only implemented half-way. + * + * It's now disallowed to create nested cgroups if the subsystem is + * broken and cgroup core will emit a warning message on such + * cases. Eventually, all subsystems will be made properly + * hierarchical and this will go away. + */ + bool broken_hierarchy; + bool warned_broken_hierarchy; + + /* the following two fields are initialized automtically during boot */ + int id; + const char *name; + + /* link to parent, protected by cgroup_lock() */ + struct cgroup_root *root; + + /* idr for css->id */ + struct idr css_idr; + + /* + * List of cftypes. Each entry is the first entry of an array + * terminated by zero length name. + */ + struct list_head cfts; + + /* + * Base cftypes which are automatically registered. The two can + * point to the same array. + */ + struct cftype *dfl_cftypes; /* for the default hierarchy */ + struct cftype *legacy_cftypes; /* for the legacy hierarchies */ + + /* + * A subsystem may depend on other subsystems. When such subsystem + * is enabled on a cgroup, the depended-upon subsystems are enabled + * together if available. Subsystems enabled due to dependency are + * not visible to userland until explicitly enabled. The following + * specifies the mask of subsystems that this one depends on. + */ + unsigned int depends_on; +}; + +#endif /* CONFIG_CGROUPS */ +#endif /* _LINUX_CGROUP_DEFS_H */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b9cb94c3102a..96a2ecd5aa69 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -11,23 +11,16 @@ #include #include #include -#include #include #include #include -#include -#include #include -#include #include #include -#include -#ifdef CONFIG_CGROUPS +#include -struct cgroup_root; -struct cgroup_subsys; -struct cgroup; +#ifdef CONFIG_CGROUPS extern int cgroup_init_early(void); extern int cgroup_init(void); @@ -40,66 +33,6 @@ extern int cgroupstats_build(struct cgroupstats *stats, extern int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *tsk); -/* define the enumeration of all cgroup subsystems */ -#define SUBSYS(_x) _x ## _cgrp_id, -enum cgroup_subsys_id { -#include - CGROUP_SUBSYS_COUNT, -}; -#undef SUBSYS - -/* - * Per-subsystem/per-cgroup state maintained by the system. This is the - * fundamental structural building block that controllers deal with. - * - * Fields marked with "PI:" are public and immutable and may be accessed - * directly without synchronization. - */ -struct cgroup_subsys_state { - /* PI: the cgroup that this css is attached to */ - struct cgroup *cgroup; - - /* PI: the cgroup subsystem that this css is attached to */ - struct cgroup_subsys *ss; - - /* reference count - access via css_[try]get() and css_put() */ - struct percpu_ref refcnt; - - /* PI: the parent css */ - struct cgroup_subsys_state *parent; - - /* siblings list anchored at the parent's ->children */ - struct list_head sibling; - struct list_head children; - - /* - * PI: Subsys-unique ID. 0 is unused and root is always 1. The - * matching css can be looked up using css_from_id(). - */ - int id; - - unsigned int flags; - - /* - * Monotonically increasing unique serial number which defines a - * uniform order among all csses. It's guaranteed that all - * ->children lists are in the ascending order of ->serial_nr and - * used to allow interrupting and resuming iterations. - */ - u64 serial_nr; - - /* percpu_ref killing and RCU release */ - struct rcu_head rcu_head; - struct work_struct destroy_work; -}; - -/* bits in struct cgroup_subsys_state flags field */ -enum { - CSS_NO_REF = (1 << 0), /* no reference counting for this css */ - CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ - CSS_RELEASED = (1 << 2), /* refcnt reached zero, released */ -}; - /** * css_get - obtain a reference on the specified css * @css: target css @@ -185,307 +118,6 @@ static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n) percpu_ref_put_many(&css->refcnt, n); } -/* bits in struct cgroup flags field */ -enum { - /* Control Group requires release notifications to userspace */ - CGRP_NOTIFY_ON_RELEASE, - /* - * Clone the parent's configuration when creating a new child - * cpuset cgroup. For historical reasons, this option can be - * specified at mount time and thus is implemented here. - */ - CGRP_CPUSET_CLONE_CHILDREN, -}; - -struct cgroup { - /* self css with NULL ->ss, points back to this cgroup */ - struct cgroup_subsys_state self; - - unsigned long flags; /* "unsigned long" so bitops work */ - - /* - * idr allocated in-hierarchy ID. - * - * ID 0 is not used, the ID of the root cgroup is always 1, and a - * new cgroup will be assigned with a smallest available ID. - * - * Allocating/Removing ID must be protected by cgroup_mutex. - */ - int id; - - /* - * If this cgroup contains any tasks, it contributes one to - * populated_cnt. All children with non-zero popuplated_cnt of - * their own contribute one. The count is zero iff there's no task - * in this cgroup or its subtree. - */ - int populated_cnt; - - struct kernfs_node *kn; /* cgroup kernfs entry */ - struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */ - - /* - * The bitmask of subsystems enabled on the child cgroups. - * ->subtree_control is the one configured through - * "cgroup.subtree_control" while ->child_subsys_mask is the - * effective one which may have more subsystems enabled. - * Controller knobs are made available iff it's enabled in - * ->subtree_control. - */ - unsigned int subtree_control; - unsigned int child_subsys_mask; - - /* Private pointers for each registered subsystem */ - struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; - - struct cgroup_root *root; - - /* - * List of cgrp_cset_links pointing at css_sets with tasks in this - * cgroup. Protected by css_set_lock. - */ - struct list_head cset_links; - - /* - * On the default hierarchy, a css_set for a cgroup with some - * susbsys disabled will point to css's which are associated with - * the closest ancestor which has the subsys enabled. The - * following lists all css_sets which point to this cgroup's css - * for the given subsystem. - */ - struct list_head e_csets[CGROUP_SUBSYS_COUNT]; - - /* - * list of pidlists, up to two for each namespace (one for procs, one - * for tasks); created on demand. - */ - struct list_head pidlists; - struct mutex pidlist_mutex; - - /* used to wait for offlining of csses */ - wait_queue_head_t offline_waitq; - - /* used to schedule release agent */ - struct work_struct release_agent_work; -}; - -#define MAX_CGROUP_ROOT_NAMELEN 64 - -/* cgroup_root->flags */ -enum { - CGRP_ROOT_SANE_BEHAVIOR = (1 << 0), /* __DEVEL__sane_behavior specified */ - CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ - CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ -}; - -/* - * A cgroup_root represents the root of a cgroup hierarchy, and may be - * associated with a kernfs_root to form an active hierarchy. This is - * internal to cgroup core. Don't access directly from controllers. - */ -struct cgroup_root { - struct kernfs_root *kf_root; - - /* The bitmask of subsystems attached to this hierarchy */ - unsigned int subsys_mask; - - /* Unique id for this hierarchy. */ - int hierarchy_id; - - /* The root cgroup. Root is destroyed on its release. */ - struct cgroup cgrp; - - /* Number of cgroups in the hierarchy, used only for /proc/cgroups */ - atomic_t nr_cgrps; - - /* A list running through the active hierarchies */ - struct list_head root_list; - - /* Hierarchy-specific flags */ - unsigned int flags; - - /* IDs for cgroups in this hierarchy */ - struct idr cgroup_idr; - - /* The path to use for release notifications. */ - char release_agent_path[PATH_MAX]; - - /* The name for this hierarchy - may be empty */ - char name[MAX_CGROUP_ROOT_NAMELEN]; -}; - -/* - * A css_set is a structure holding pointers to a set of - * cgroup_subsys_state objects. This saves space in the task struct - * object and speeds up fork()/exit(), since a single inc/dec and a - * list_add()/del() can bump the reference count on the entire cgroup - * set for a task. - */ - -struct css_set { - - /* Reference count */ - atomic_t refcount; - - /* - * List running through all cgroup groups in the same hash - * slot. Protected by css_set_lock - */ - struct hlist_node hlist; - - /* - * Lists running through all tasks using this cgroup group. - * mg_tasks lists tasks which belong to this cset but are in the - * process of being migrated out or in. Protected by - * css_set_rwsem, but, during migration, once tasks are moved to - * mg_tasks, it can be read safely while holding cgroup_mutex. - */ - struct list_head tasks; - struct list_head mg_tasks; - - /* - * List of cgrp_cset_links pointing at cgroups referenced from this - * css_set. Protected by css_set_lock. - */ - struct list_head cgrp_links; - - /* the default cgroup associated with this css_set */ - struct cgroup *dfl_cgrp; - - /* - * Set of subsystem states, one for each subsystem. This array is - * immutable after creation apart from the init_css_set during - * subsystem registration (at boot time). - */ - struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; - - /* - * List of csets participating in the on-going migration either as - * source or destination. Protected by cgroup_mutex. - */ - struct list_head mg_preload_node; - struct list_head mg_node; - - /* - * If this cset is acting as the source of migration the following - * two fields are set. mg_src_cgrp is the source cgroup of the - * on-going migration and mg_dst_cset is the destination cset the - * target tasks on this cset should be migrated to. Protected by - * cgroup_mutex. - */ - struct cgroup *mg_src_cgrp; - struct css_set *mg_dst_cset; - - /* - * On the default hierarhcy, ->subsys[ssid] may point to a css - * attached to an ancestor instead of the cgroup this css_set is - * associated with. The following node is anchored at - * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to - * iterate through all css's attached to a given cgroup. - */ - struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; - - /* For RCU-protected deletion */ - struct rcu_head rcu_head; -}; - -/* - * struct cftype: handler definitions for cgroup control files - * - * When reading/writing to a file: - * - the cgroup to use is file->f_path.dentry->d_parent->d_fsdata - * - the 'cftype' of the file is file->f_path.dentry->d_fsdata - */ - -/* cftype->flags */ -enum { - CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ - CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ - CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ - - /* internal flags, do not use outside cgroup core proper */ - __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ - __CFTYPE_NOT_ON_DFL = (1 << 17), /* not on default hierarchy */ -}; - -#define MAX_CFTYPE_NAME 64 - -struct cftype { - /* - * By convention, the name should begin with the name of the - * subsystem, followed by a period. Zero length string indicates - * end of cftype array. - */ - char name[MAX_CFTYPE_NAME]; - int private; - /* - * If not 0, file mode is set to this value, otherwise it will - * be figured out automatically - */ - umode_t mode; - - /* - * The maximum length of string, excluding trailing nul, that can - * be passed to write. If < PAGE_SIZE-1, PAGE_SIZE-1 is assumed. - */ - size_t max_write_len; - - /* CFTYPE_* flags */ - unsigned int flags; - - /* - * Fields used for internal bookkeeping. Initialized automatically - * during registration. - */ - struct cgroup_subsys *ss; /* NULL for cgroup core files */ - struct list_head node; /* anchored at ss->cfts */ - struct kernfs_ops *kf_ops; - - /* - * read_u64() is a shortcut for the common case of returning a - * single integer. Use it in place of read() - */ - u64 (*read_u64)(struct cgroup_subsys_state *css, struct cftype *cft); - /* - * read_s64() is a signed version of read_u64() - */ - s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft); - - /* generic seq_file read interface */ - int (*seq_show)(struct seq_file *sf, void *v); - - /* optional ops, implement all or none */ - void *(*seq_start)(struct seq_file *sf, loff_t *ppos); - void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos); - void (*seq_stop)(struct seq_file *sf, void *v); - - /* - * write_u64() is a shortcut for the common case of accepting - * a single integer (as parsed by simple_strtoull) from - * userspace. Use in place of write(); return 0 or error. - */ - int (*write_u64)(struct cgroup_subsys_state *css, struct cftype *cft, - u64 val); - /* - * write_s64() is a signed version of write_u64() - */ - int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft, - s64 val); - - /* - * write() is the generic write callback which maps directly to - * kernfs write operation and overrides all other operations. - * Maximum write size is determined by ->max_write_len. Use - * of_css/cft() to access the associated css and cft. - */ - ssize_t (*write)(struct kernfs_open_file *of, - char *buf, size_t nbytes, loff_t off); - -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lock_class_key lockdep_key; -#endif -}; - extern struct cgroup_root cgrp_dfl_root; extern struct css_set init_css_set; @@ -612,11 +244,6 @@ int cgroup_rm_cftypes(struct cftype *cfts); bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); -/* - * Control Group taskset, used to pass around set of tasks to cgroup_subsys - * methods. - */ -struct cgroup_taskset; struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset); struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset); @@ -629,84 +256,6 @@ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset); for ((task) = cgroup_taskset_first((tset)); (task); \ (task) = cgroup_taskset_next((tset))) -/* - * Control Group subsystem type. - * See Documentation/cgroups/cgroups.txt for details - */ - -struct cgroup_subsys { - struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css); - int (*css_online)(struct cgroup_subsys_state *css); - void (*css_offline)(struct cgroup_subsys_state *css); - void (*css_released)(struct cgroup_subsys_state *css); - void (*css_free)(struct cgroup_subsys_state *css); - void (*css_reset)(struct cgroup_subsys_state *css); - void (*css_e_css_changed)(struct cgroup_subsys_state *css); - - int (*can_attach)(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset); - void (*cancel_attach)(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset); - void (*attach)(struct cgroup_subsys_state *css, - struct cgroup_taskset *tset); - void (*fork)(struct task_struct *task); - void (*exit)(struct cgroup_subsys_state *css, - struct cgroup_subsys_state *old_css, - struct task_struct *task); - void (*bind)(struct cgroup_subsys_state *root_css); - - int disabled; - int early_init; - - /* - * If %false, this subsystem is properly hierarchical - - * configuration, resource accounting and restriction on a parent - * cgroup cover those of its children. If %true, hierarchy support - * is broken in some ways - some subsystems ignore hierarchy - * completely while others are only implemented half-way. - * - * It's now disallowed to create nested cgroups if the subsystem is - * broken and cgroup core will emit a warning message on such - * cases. Eventually, all subsystems will be made properly - * hierarchical and this will go away. - */ - bool broken_hierarchy; - bool warned_broken_hierarchy; - - /* the following two fields are initialized automtically during boot */ - int id; -#define MAX_CGROUP_TYPE_NAMELEN 32 - const char *name; - - /* link to parent, protected by cgroup_lock() */ - struct cgroup_root *root; - - /* idr for css->id */ - struct idr css_idr; - - /* - * List of cftypes. Each entry is the first entry of an array - * terminated by zero length name. - */ - struct list_head cfts; - - /* - * Base cftypes which are automatically registered. The two can - * point to the same array. - */ - struct cftype *dfl_cftypes; /* for the default hierarchy */ - struct cftype *legacy_cftypes; /* for the legacy hierarchies */ - - /* - * A subsystem may depend on other subsystems. When such subsystem - * is enabled on a cgroup, the depended-upon subsystems are enabled - * together if available. Subsystems enabled due to dependency are - * not visible to userland until explicitly enabled. The following - * specifies the mask of subsystems that this one depends on. - */ - unsigned int depends_on; -}; - #define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys; #include #undef SUBSYS -- cgit v1.2.3 From c326aa2bb2209e10df4a381801bb34ca0f923038 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 13 May 2015 16:24:16 -0400 Subject: cgroup: reorganize include/linux/cgroup.h From c4d440938b5e2015c70594fe6666a099c844f929 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 13 May 2015 16:21:40 -0400 Over time, cgroup.h grew organically and doesn't have much logical structure at this point. Separation of cgroup-defs.h in the previous patch gives us a good chance for reorganizing cgroup.h as changes to the header are likely to cause conflicts anyway. This patch reorganizes cgroup.h so that it has consistent logical grouping. This is pure reorganization. v2: Relocating #ifdef CONFIG_CGROUPS caused build failure when cgroup is disabled. Dropped. Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 552 ++++++++++++++++++++++++------------------------- 1 file changed, 271 insertions(+), 281 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 96a2ecd5aa69..82319fb31cfe 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -22,16 +22,189 @@ #ifdef CONFIG_CGROUPS -extern int cgroup_init_early(void); -extern int cgroup_init(void); -extern void cgroup_fork(struct task_struct *p); -extern void cgroup_post_fork(struct task_struct *p); -extern void cgroup_exit(struct task_struct *p); -extern int cgroupstats_build(struct cgroupstats *stats, - struct dentry *dentry); +/* a css_task_iter should be treated as an opaque object */ +struct css_task_iter { + struct cgroup_subsys *ss; + + struct list_head *cset_pos; + struct list_head *cset_head; + + struct list_head *task_pos; + struct list_head *tasks_head; + struct list_head *mg_tasks_head; +}; + +extern struct cgroup_root cgrp_dfl_root; +extern struct css_set init_css_set; + +#define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys; +#include +#undef SUBSYS + +bool css_has_online_children(struct cgroup_subsys_state *css); +struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); +struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup, + struct cgroup_subsys *ss); +struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, + struct cgroup_subsys *ss); + +bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); +int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); +int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); + +int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); +int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); +int cgroup_rm_cftypes(struct cftype *cfts); + +char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); +int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry); +int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *tsk); + +void cgroup_fork(struct task_struct *p); +void cgroup_post_fork(struct task_struct *p); +void cgroup_exit(struct task_struct *p); + +int cgroup_init_early(void); +int cgroup_init(void); + +/* + * Iteration helpers and macros. + */ + +struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos, + struct cgroup_subsys_state *parent); +struct cgroup_subsys_state *css_next_descendant_pre(struct cgroup_subsys_state *pos, + struct cgroup_subsys_state *css); +struct cgroup_subsys_state *css_rightmost_descendant(struct cgroup_subsys_state *pos); +struct cgroup_subsys_state *css_next_descendant_post(struct cgroup_subsys_state *pos, + struct cgroup_subsys_state *css); + +struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset); +struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset); + +void css_task_iter_start(struct cgroup_subsys_state *css, + struct css_task_iter *it); +struct task_struct *css_task_iter_next(struct css_task_iter *it); +void css_task_iter_end(struct css_task_iter *it); + +/** + * css_for_each_child - iterate through children of a css + * @pos: the css * to use as the loop cursor + * @parent: css whose children to walk + * + * Walk @parent's children. Must be called under rcu_read_lock(). + * + * If a subsystem synchronizes ->css_online() and the start of iteration, a + * css which finished ->css_online() is guaranteed to be visible in the + * future iterations and will stay visible until the last reference is put. + * A css which hasn't finished ->css_online() or already finished + * ->css_offline() may show up during traversal. It's each subsystem's + * responsibility to synchronize against on/offlining. + * + * It is allowed to temporarily drop RCU read lock during iteration. The + * caller is responsible for ensuring that @pos remains accessible until + * the start of the next iteration by, for example, bumping the css refcnt. + */ +#define css_for_each_child(pos, parent) \ + for ((pos) = css_next_child(NULL, (parent)); (pos); \ + (pos) = css_next_child((pos), (parent))) + +/** + * css_for_each_descendant_pre - pre-order walk of a css's descendants + * @pos: the css * to use as the loop cursor + * @root: css whose descendants to walk + * + * Walk @root's descendants. @root is included in the iteration and the + * first node to be visited. Must be called under rcu_read_lock(). + * + * If a subsystem synchronizes ->css_online() and the start of iteration, a + * css which finished ->css_online() is guaranteed to be visible in the + * future iterations and will stay visible until the last reference is put. + * A css which hasn't finished ->css_online() or already finished + * ->css_offline() may show up during traversal. It's each subsystem's + * responsibility to synchronize against on/offlining. + * + * For example, the following guarantees that a descendant can't escape + * state updates of its ancestors. + * + * my_online(@css) + * { + * Lock @css's parent and @css; + * Inherit state from the parent; + * Unlock both. + * } + * + * my_update_state(@css) + * { + * css_for_each_descendant_pre(@pos, @css) { + * Lock @pos; + * if (@pos == @css) + * Update @css's state; + * else + * Verify @pos is alive and inherit state from its parent; + * Unlock @pos; + * } + * } + * + * As long as the inheriting step, including checking the parent state, is + * enclosed inside @pos locking, double-locking the parent isn't necessary + * while inheriting. The state update to the parent is guaranteed to be + * visible by walking order and, as long as inheriting operations to the + * same @pos are atomic to each other, multiple updates racing each other + * still result in the correct state. It's guaranateed that at least one + * inheritance happens for any css after the latest update to its parent. + * + * If checking parent's state requires locking the parent, each inheriting + * iteration should lock and unlock both @pos->parent and @pos. + * + * Alternatively, a subsystem may choose to use a single global lock to + * synchronize ->css_online() and ->css_offline() against tree-walking + * operations. + * + * It is allowed to temporarily drop RCU read lock during iteration. The + * caller is responsible for ensuring that @pos remains accessible until + * the start of the next iteration by, for example, bumping the css refcnt. + */ +#define css_for_each_descendant_pre(pos, css) \ + for ((pos) = css_next_descendant_pre(NULL, (css)); (pos); \ + (pos) = css_next_descendant_pre((pos), (css))) + +/** + * css_for_each_descendant_post - post-order walk of a css's descendants + * @pos: the css * to use as the loop cursor + * @css: css whose descendants to walk + * + * Similar to css_for_each_descendant_pre() but performs post-order + * traversal instead. @root is included in the iteration and the last + * node to be visited. + * + * If a subsystem synchronizes ->css_online() and the start of iteration, a + * css which finished ->css_online() is guaranteed to be visible in the + * future iterations and will stay visible until the last reference is put. + * A css which hasn't finished ->css_online() or already finished + * ->css_offline() may show up during traversal. It's each subsystem's + * responsibility to synchronize against on/offlining. + * + * Note that the walk visibility guarantee example described in pre-order + * walk doesn't apply the same to post-order walks. + */ +#define css_for_each_descendant_post(pos, css) \ + for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \ + (pos) = css_next_descendant_post((pos), (css))) + +/** + * cgroup_taskset_for_each - iterate cgroup_taskset + * @task: the loop cursor + * @tset: taskset to iterate + */ +#define cgroup_taskset_for_each(task, tset) \ + for ((task) = cgroup_taskset_first((tset)); (task); \ + (task) = cgroup_taskset_next((tset))) -extern int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, - struct pid *pid, struct task_struct *tsk); +/* + * Inline functions. + */ /** * css_get - obtain a reference on the specified css @@ -118,8 +291,87 @@ static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n) percpu_ref_put_many(&css->refcnt, n); } -extern struct cgroup_root cgrp_dfl_root; -extern struct css_set init_css_set; +/** + * task_css_set_check - obtain a task's css_set with extra access conditions + * @task: the task to obtain css_set for + * @__c: extra condition expression to be passed to rcu_dereference_check() + * + * A task's css_set is RCU protected, initialized and exited while holding + * task_lock(), and can only be modified while holding both cgroup_mutex + * and task_lock() while the task is alive. This macro verifies that the + * caller is inside proper critical section and returns @task's css_set. + * + * The caller can also specify additional allowed conditions via @__c, such + * as locks used during the cgroup_subsys::attach() methods. + */ +#ifdef CONFIG_PROVE_RCU +extern struct mutex cgroup_mutex; +extern struct rw_semaphore css_set_rwsem; +#define task_css_set_check(task, __c) \ + rcu_dereference_check((task)->cgroups, \ + lockdep_is_held(&cgroup_mutex) || \ + lockdep_is_held(&css_set_rwsem) || \ + ((task)->flags & PF_EXITING) || (__c)) +#else +#define task_css_set_check(task, __c) \ + rcu_dereference((task)->cgroups) +#endif + +/** + * task_css_check - obtain css for (task, subsys) w/ extra access conds + * @task: the target task + * @subsys_id: the target subsystem ID + * @__c: extra condition expression to be passed to rcu_dereference_check() + * + * Return the cgroup_subsys_state for the (@task, @subsys_id) pair. The + * synchronization rules are the same as task_css_set_check(). + */ +#define task_css_check(task, subsys_id, __c) \ + task_css_set_check((task), (__c))->subsys[(subsys_id)] + +/** + * task_css_set - obtain a task's css_set + * @task: the task to obtain css_set for + * + * See task_css_set_check(). + */ +static inline struct css_set *task_css_set(struct task_struct *task) +{ + return task_css_set_check(task, false); +} + +/** + * task_css - obtain css for (task, subsys) + * @task: the target task + * @subsys_id: the target subsystem ID + * + * See task_css_check(). + */ +static inline struct cgroup_subsys_state *task_css(struct task_struct *task, + int subsys_id) +{ + return task_css_check(task, subsys_id, false); +} + +/** + * task_css_is_root - test whether a task belongs to the root css + * @task: the target task + * @subsys_id: the target subsystem ID + * + * Test whether @task belongs to the root css on the specified subsystem. + * May be invoked in any context. + */ +static inline bool task_css_is_root(struct task_struct *task, int subsys_id) +{ + return task_css_check(task, subsys_id, true) == + init_css_set.subsys[subsys_id]; +} + +static inline struct cgroup *task_cgroup(struct task_struct *task, + int subsys_id) +{ + return task_css(task, subsys_id)->cgroup; +} /** * cgroup_on_dfl - test whether a cgroup is on the default hierarchy @@ -236,284 +488,22 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp) pr_cont_kernfs_path(cgrp->kn); } -char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); - -int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); -int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); -int cgroup_rm_cftypes(struct cftype *cfts); - -bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); - -struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset); -struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset); - -/** - * cgroup_taskset_for_each - iterate cgroup_taskset - * @task: the loop cursor - * @tset: taskset to iterate - */ -#define cgroup_taskset_for_each(task, tset) \ - for ((task) = cgroup_taskset_first((tset)); (task); \ - (task) = cgroup_taskset_next((tset))) - -#define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys; -#include -#undef SUBSYS - -/** - * task_css_set_check - obtain a task's css_set with extra access conditions - * @task: the task to obtain css_set for - * @__c: extra condition expression to be passed to rcu_dereference_check() - * - * A task's css_set is RCU protected, initialized and exited while holding - * task_lock(), and can only be modified while holding both cgroup_mutex - * and task_lock() while the task is alive. This macro verifies that the - * caller is inside proper critical section and returns @task's css_set. - * - * The caller can also specify additional allowed conditions via @__c, such - * as locks used during the cgroup_subsys::attach() methods. - */ -#ifdef CONFIG_PROVE_RCU -extern struct mutex cgroup_mutex; -extern struct rw_semaphore css_set_rwsem; -#define task_css_set_check(task, __c) \ - rcu_dereference_check((task)->cgroups, \ - lockdep_is_held(&cgroup_mutex) || \ - lockdep_is_held(&css_set_rwsem) || \ - ((task)->flags & PF_EXITING) || (__c)) -#else -#define task_css_set_check(task, __c) \ - rcu_dereference((task)->cgroups) -#endif - -/** - * task_css_check - obtain css for (task, subsys) w/ extra access conds - * @task: the target task - * @subsys_id: the target subsystem ID - * @__c: extra condition expression to be passed to rcu_dereference_check() - * - * Return the cgroup_subsys_state for the (@task, @subsys_id) pair. The - * synchronization rules are the same as task_css_set_check(). - */ -#define task_css_check(task, subsys_id, __c) \ - task_css_set_check((task), (__c))->subsys[(subsys_id)] - -/** - * task_css_set - obtain a task's css_set - * @task: the task to obtain css_set for - * - * See task_css_set_check(). - */ -static inline struct css_set *task_css_set(struct task_struct *task) -{ - return task_css_set_check(task, false); -} - -/** - * task_css - obtain css for (task, subsys) - * @task: the target task - * @subsys_id: the target subsystem ID - * - * See task_css_check(). - */ -static inline struct cgroup_subsys_state *task_css(struct task_struct *task, - int subsys_id) -{ - return task_css_check(task, subsys_id, false); -} - -/** - * task_css_is_root - test whether a task belongs to the root css - * @task: the target task - * @subsys_id: the target subsystem ID - * - * Test whether @task belongs to the root css on the specified subsystem. - * May be invoked in any context. - */ -static inline bool task_css_is_root(struct task_struct *task, int subsys_id) -{ - return task_css_check(task, subsys_id, true) == - init_css_set.subsys[subsys_id]; -} - -static inline struct cgroup *task_cgroup(struct task_struct *task, - int subsys_id) -{ - return task_css(task, subsys_id)->cgroup; -} - -struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos, - struct cgroup_subsys_state *parent); - -struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); - -/** - * css_for_each_child - iterate through children of a css - * @pos: the css * to use as the loop cursor - * @parent: css whose children to walk - * - * Walk @parent's children. Must be called under rcu_read_lock(). - * - * If a subsystem synchronizes ->css_online() and the start of iteration, a - * css which finished ->css_online() is guaranteed to be visible in the - * future iterations and will stay visible until the last reference is put. - * A css which hasn't finished ->css_online() or already finished - * ->css_offline() may show up during traversal. It's each subsystem's - * responsibility to synchronize against on/offlining. - * - * It is allowed to temporarily drop RCU read lock during iteration. The - * caller is responsible for ensuring that @pos remains accessible until - * the start of the next iteration by, for example, bumping the css refcnt. - */ -#define css_for_each_child(pos, parent) \ - for ((pos) = css_next_child(NULL, (parent)); (pos); \ - (pos) = css_next_child((pos), (parent))) - -struct cgroup_subsys_state * -css_next_descendant_pre(struct cgroup_subsys_state *pos, - struct cgroup_subsys_state *css); - -struct cgroup_subsys_state * -css_rightmost_descendant(struct cgroup_subsys_state *pos); - -/** - * css_for_each_descendant_pre - pre-order walk of a css's descendants - * @pos: the css * to use as the loop cursor - * @root: css whose descendants to walk - * - * Walk @root's descendants. @root is included in the iteration and the - * first node to be visited. Must be called under rcu_read_lock(). - * - * If a subsystem synchronizes ->css_online() and the start of iteration, a - * css which finished ->css_online() is guaranteed to be visible in the - * future iterations and will stay visible until the last reference is put. - * A css which hasn't finished ->css_online() or already finished - * ->css_offline() may show up during traversal. It's each subsystem's - * responsibility to synchronize against on/offlining. - * - * For example, the following guarantees that a descendant can't escape - * state updates of its ancestors. - * - * my_online(@css) - * { - * Lock @css's parent and @css; - * Inherit state from the parent; - * Unlock both. - * } - * - * my_update_state(@css) - * { - * css_for_each_descendant_pre(@pos, @css) { - * Lock @pos; - * if (@pos == @css) - * Update @css's state; - * else - * Verify @pos is alive and inherit state from its parent; - * Unlock @pos; - * } - * } - * - * As long as the inheriting step, including checking the parent state, is - * enclosed inside @pos locking, double-locking the parent isn't necessary - * while inheriting. The state update to the parent is guaranteed to be - * visible by walking order and, as long as inheriting operations to the - * same @pos are atomic to each other, multiple updates racing each other - * still result in the correct state. It's guaranateed that at least one - * inheritance happens for any css after the latest update to its parent. - * - * If checking parent's state requires locking the parent, each inheriting - * iteration should lock and unlock both @pos->parent and @pos. - * - * Alternatively, a subsystem may choose to use a single global lock to - * synchronize ->css_online() and ->css_offline() against tree-walking - * operations. - * - * It is allowed to temporarily drop RCU read lock during iteration. The - * caller is responsible for ensuring that @pos remains accessible until - * the start of the next iteration by, for example, bumping the css refcnt. - */ -#define css_for_each_descendant_pre(pos, css) \ - for ((pos) = css_next_descendant_pre(NULL, (css)); (pos); \ - (pos) = css_next_descendant_pre((pos), (css))) - -struct cgroup_subsys_state * -css_next_descendant_post(struct cgroup_subsys_state *pos, - struct cgroup_subsys_state *css); - -/** - * css_for_each_descendant_post - post-order walk of a css's descendants - * @pos: the css * to use as the loop cursor - * @css: css whose descendants to walk - * - * Similar to css_for_each_descendant_pre() but performs post-order - * traversal instead. @root is included in the iteration and the last - * node to be visited. - * - * If a subsystem synchronizes ->css_online() and the start of iteration, a - * css which finished ->css_online() is guaranteed to be visible in the - * future iterations and will stay visible until the last reference is put. - * A css which hasn't finished ->css_online() or already finished - * ->css_offline() may show up during traversal. It's each subsystem's - * responsibility to synchronize against on/offlining. - * - * Note that the walk visibility guarantee example described in pre-order - * walk doesn't apply the same to post-order walks. - */ -#define css_for_each_descendant_post(pos, css) \ - for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \ - (pos) = css_next_descendant_post((pos), (css))) - -bool css_has_online_children(struct cgroup_subsys_state *css); - -/* A css_task_iter should be treated as an opaque object */ -struct css_task_iter { - struct cgroup_subsys *ss; - - struct list_head *cset_pos; - struct list_head *cset_head; - - struct list_head *task_pos; - struct list_head *tasks_head; - struct list_head *mg_tasks_head; -}; - -void css_task_iter_start(struct cgroup_subsys_state *css, - struct css_task_iter *it); -struct task_struct *css_task_iter_next(struct css_task_iter *it); -void css_task_iter_end(struct css_task_iter *it); - -int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); -int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); - -struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup, - struct cgroup_subsys *ss); -struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, - struct cgroup_subsys *ss); - #else /* !CONFIG_CGROUPS */ struct cgroup_subsys_state; -static inline int cgroup_init_early(void) { return 0; } -static inline int cgroup_init(void) { return 0; } +static inline void css_put(struct cgroup_subsys_state *css) {} +static inline int cgroup_attach_task_all(struct task_struct *from, + struct task_struct *t) { return 0; } +static inline int cgroupstats_build(struct cgroupstats *stats, + struct dentry *dentry) { return -EINVAL; } + static inline void cgroup_fork(struct task_struct *p) {} static inline void cgroup_post_fork(struct task_struct *p) {} static inline void cgroup_exit(struct task_struct *p) {} -static inline int cgroupstats_build(struct cgroupstats *stats, - struct dentry *dentry) -{ - return -EINVAL; -} - -static inline void css_put(struct cgroup_subsys_state *css) {} - -/* No cgroups - nothing to do */ -static inline int cgroup_attach_task_all(struct task_struct *from, - struct task_struct *t) -{ - return 0; -} +static inline int cgroup_init_early(void) { return 0; } +static inline int cgroup_init(void) { return 0; } #endif /* !CONFIG_CGROUPS */ -- cgit v1.2.3 From 87e9b9f1d86c2ee9a10c2a4186a72d0af4cc963e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 16 May 2015 01:38:15 +0200 Subject: PM / sleep: Make suspend-to-idle-specific code depend on CONFIG_SUSPEND Since idle_should_freeze() is defined to always return 'false' for CONFIG_SUSPEND unset, all of the code depending on it in cpuidle_idle_call() is not necessary in that case. Make that code depend on CONFIG_SUSPEND too to avoid building it when it is not going to be used. Signed-off-by: Rafael J. Wysocki Acked-by: Thomas Gleixner --- include/linux/cpuidle.h | 16 ++++++++++------ include/linux/tick.h | 12 ++++++++---- 2 files changed, 18 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 9c5e89254796..13ee266ca98c 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -151,10 +151,6 @@ extern void cpuidle_resume(void); extern int cpuidle_enable_device(struct cpuidle_device *dev); extern void cpuidle_disable_device(struct cpuidle_device *dev); extern int cpuidle_play_dead(void); -extern int cpuidle_find_deepest_state(struct cpuidle_driver *drv, - struct cpuidle_device *dev); -extern int cpuidle_enter_freeze(struct cpuidle_driver *drv, - struct cpuidle_device *dev); extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev); #else @@ -190,14 +186,22 @@ static inline int cpuidle_enable_device(struct cpuidle_device *dev) {return -ENODEV; } static inline void cpuidle_disable_device(struct cpuidle_device *dev) { } static inline int cpuidle_play_dead(void) {return -ENODEV; } +static inline struct cpuidle_driver *cpuidle_get_cpu_driver( + struct cpuidle_device *dev) {return NULL; } +#endif + +#if defined(CONFIG_CPU_IDLE) && defined(CONFIG_SUSPEND) +extern int cpuidle_find_deepest_state(struct cpuidle_driver *drv, + struct cpuidle_device *dev); +extern int cpuidle_enter_freeze(struct cpuidle_driver *drv, + struct cpuidle_device *dev); +#else static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv, struct cpuidle_device *dev) {return -ENODEV; } static inline int cpuidle_enter_freeze(struct cpuidle_driver *drv, struct cpuidle_device *dev) {return -ENODEV; } -static inline struct cpuidle_driver *cpuidle_get_cpu_driver( - struct cpuidle_device *dev) {return NULL; } #endif #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED diff --git a/include/linux/tick.h b/include/linux/tick.h index f8492da57ad3..ec6e8bc992bf 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -13,8 +13,6 @@ #ifdef CONFIG_GENERIC_CLOCKEVENTS extern void __init tick_init(void); -extern void tick_freeze(void); -extern void tick_unfreeze(void); /* Should be core only, but ARM BL switcher requires it */ extern void tick_suspend_local(void); /* Should be core only, but XEN resume magic and ARM BL switcher require it */ @@ -23,14 +21,20 @@ extern void tick_handover_do_timer(void); extern void tick_cleanup_dead_cpu(int cpu); #else /* CONFIG_GENERIC_CLOCKEVENTS */ static inline void tick_init(void) { } -static inline void tick_freeze(void) { } -static inline void tick_unfreeze(void) { } static inline void tick_suspend_local(void) { } static inline void tick_resume_local(void) { } static inline void tick_handover_do_timer(void) { } static inline void tick_cleanup_dead_cpu(int cpu) { } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ +#if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_SUSPEND) +extern void tick_freeze(void); +extern void tick_unfreeze(void); +#else +static inline void tick_freeze(void) { } +static inline void tick_unfreeze(void) { } +#endif + #ifdef CONFIG_TICK_ONESHOT extern void tick_irq_enter(void); # ifndef arch_needs_cpu -- cgit v1.2.3 From ab3f02fc237211f0583c1e7ba3bf504747be9b8d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 12 May 2015 10:52:27 +0200 Subject: locking/arch: Add WRITE_ONCE() to set_mb() Since we assume set_mb() to result in a single store followed by a full memory barrier, employ WRITE_ONCE(). Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index a7c0941d10da..03e227ba481c 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -250,7 +250,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) #define WRITE_ONCE(x, val) \ - ({ typeof(x) __val = (val); __write_once_size(&(x), &__val, sizeof(__val)); __val; }) + ({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) #endif /* __KERNEL__ */ -- cgit v1.2.3 From b92b8b35a2e38bde319fd1d68ec84628c1f1b0fb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 12 May 2015 10:51:55 +0200 Subject: locking/arch: Rename set_mb() to smp_store_mb() Since set_mb() is really about an smp_mb() -- not a IO/DMA barrier like mb() rename it to match the recent smp_load_acquire() and smp_store_release(). Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 26a2e6122734..18f197223ebd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -252,7 +252,7 @@ extern char ___assert_task_state[1 - 2*!!( #define set_task_state(tsk, state_value) \ do { \ (tsk)->task_state_change = _THIS_IP_; \ - set_mb((tsk)->state, (state_value)); \ + smp_store_mb((tsk)->state, (state_value)); \ } while (0) /* @@ -274,7 +274,7 @@ extern char ___assert_task_state[1 - 2*!!( #define set_current_state(state_value) \ do { \ current->task_state_change = _THIS_IP_; \ - set_mb(current->state, (state_value)); \ + smp_store_mb(current->state, (state_value)); \ } while (0) #else @@ -282,7 +282,7 @@ extern char ___assert_task_state[1 - 2*!!( #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) #define set_task_state(tsk, state_value) \ - set_mb((tsk)->state, (state_value)) + smp_store_mb((tsk)->state, (state_value)) /* * set_current_state() includes a barrier so that the write of current->state @@ -298,7 +298,7 @@ extern char ___assert_task_state[1 - 2*!!( #define __set_current_state(state_value) \ do { current->state = (state_value); } while (0) #define set_current_state(state_value) \ - set_mb(current->state, (state_value)) + smp_store_mb(current->state, (state_value)) #endif -- cgit v1.2.3 From 92cf211874e954027b8e91cc9a15485a50b58d6b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 12 May 2015 16:41:46 +0200 Subject: sched/preempt: Merge preempt_mask.h into preempt.h preempt_mask.h defines all the preempt_count semantics and related symbols: preempt, softirq, hardirq, nmi, preempt active, need resched, etc... preempt.h defines the accessors and mutators of preempt_count. But there is a messy dependency game around those two header files: * preempt_mask.h includes preempt.h in order to access preempt_count() * preempt_mask.h defines all preempt_count semantic and symbols except PREEMPT_NEED_RESCHED that is needed by asm/preempt.h Thus we need to define it from preempt.h, right before including asm/preempt.h, instead of defining it to preempt_mask.h with the other preempt_count symbols. Therefore the preempt_count semantics happen to be spread out. * We plan to introduce preempt_active_[enter,exit]() to consolidate preempt_schedule*() code. But we'll need to access both preempt_count mutators (preempt_count_add()) and preempt_count symbols (PREEMPT_ACTIVE, PREEMPT_OFFSET). The usual place to define preempt operations is in preempt.h but then we'll need symbols in preempt_mask.h which already includes preempt.h. So we end up with a ressource circle dependency. Lets merge preempt_mask.h into preempt.h to solve these dependency issues. This way we gather semantic symbols and operation definition of preempt_count in a single file. This is a dumb copy-paste merge. Further merge re-arrangments are performed in a subsequent patch to ease review. Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1431441711-29753-2-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/bottom_half.h | 1 - include/linux/hardirq.h | 2 +- include/linux/preempt.h | 111 ++++++++++++++++++++++++++++++++++++++++ include/linux/preempt_mask.h | 117 ------------------------------------------- include/linux/sched.h | 2 +- 5 files changed, 113 insertions(+), 120 deletions(-) delete mode 100644 include/linux/preempt_mask.h (limited to 'include/linux') diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h index 86c12c93e3cf..8fdcb783197d 100644 --- a/include/linux/bottom_half.h +++ b/include/linux/bottom_half.h @@ -2,7 +2,6 @@ #define _LINUX_BH_H #include -#include #ifdef CONFIG_TRACE_IRQFLAGS extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt); diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index f4af03404b97..dfd59d6bc6f0 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -1,7 +1,7 @@ #ifndef LINUX_HARDIRQ_H #define LINUX_HARDIRQ_H -#include +#include #include #include #include diff --git a/include/linux/preempt.h b/include/linux/preempt.h index de83b4eb1642..8cc0338a5e9a 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -17,6 +17,117 @@ #include +/* + * We put the hardirq and softirq counter into the preemption + * counter. The bitmask has the following meaning: + * + * - bits 0-7 are the preemption count (max preemption depth: 256) + * - bits 8-15 are the softirq count (max # of softirqs: 256) + * + * The hardirq count could in theory be the same as the number of + * interrupts in the system, but we run all interrupt handlers with + * interrupts disabled, so we cannot have nesting interrupts. Though + * there are a few palaeontologic drivers which reenable interrupts in + * the handler, so we need more than one bit here. + * + * PREEMPT_MASK: 0x000000ff + * SOFTIRQ_MASK: 0x0000ff00 + * HARDIRQ_MASK: 0x000f0000 + * NMI_MASK: 0x00100000 + * PREEMPT_ACTIVE: 0x00200000 + */ +#define PREEMPT_BITS 8 +#define SOFTIRQ_BITS 8 +#define HARDIRQ_BITS 4 +#define NMI_BITS 1 + +#define PREEMPT_SHIFT 0 +#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS) +#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS) +#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS) + +#define __IRQ_MASK(x) ((1UL << (x))-1) + +#define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT) +#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) +#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT) +#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT) + +#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT) +#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT) +#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) +#define NMI_OFFSET (1UL << NMI_SHIFT) + +#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) + +#define PREEMPT_ACTIVE_BITS 1 +#define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS) +#define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT) + +#define hardirq_count() (preempt_count() & HARDIRQ_MASK) +#define softirq_count() (preempt_count() & SOFTIRQ_MASK) +#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ + | NMI_MASK)) + +/* + * Are we doing bottom half or hardware interrupt processing? + * Are we in a softirq context? Interrupt context? + * in_softirq - Are we currently processing softirq or have bh disabled? + * in_serving_softirq - Are we currently processing softirq? + */ +#define in_irq() (hardirq_count()) +#define in_softirq() (softirq_count()) +#define in_interrupt() (irq_count()) +#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) + +/* + * Are we in NMI context? + */ +#define in_nmi() (preempt_count() & NMI_MASK) + +#if defined(CONFIG_PREEMPT_COUNT) +# define PREEMPT_CHECK_OFFSET 1 +#else +# define PREEMPT_CHECK_OFFSET 0 +#endif + +/* + * The preempt_count offset needed for things like: + * + * spin_lock_bh() + * + * Which need to disable both preemption (CONFIG_PREEMPT_COUNT) and + * softirqs, such that unlock sequences of: + * + * spin_unlock(); + * local_bh_enable(); + * + * Work as expected. + */ +#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_CHECK_OFFSET) + +/* + * Are we running in atomic context? WARNING: this macro cannot + * always detect atomic context; in particular, it cannot know about + * held spinlocks in non-preemptible kernels. Thus it should not be + * used in the general case to determine whether sleeping is possible. + * Do not use in_atomic() in driver code. + */ +#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0) + +/* + * Check whether we were atomic before we did preempt_disable(): + * (used by the scheduler, *after* releasing the kernel lock) + */ +#define in_atomic_preempt_off() \ + ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET) + +#ifdef CONFIG_PREEMPT_COUNT +# define preemptible() (preempt_count() == 0 && !irqs_disabled()) +#else +# define preemptible() 0 +#endif + #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER) extern void preempt_count_add(int val); extern void preempt_count_sub(int val); diff --git a/include/linux/preempt_mask.h b/include/linux/preempt_mask.h deleted file mode 100644 index dbeec4d4a3be..000000000000 --- a/include/linux/preempt_mask.h +++ /dev/null @@ -1,117 +0,0 @@ -#ifndef LINUX_PREEMPT_MASK_H -#define LINUX_PREEMPT_MASK_H - -#include - -/* - * We put the hardirq and softirq counter into the preemption - * counter. The bitmask has the following meaning: - * - * - bits 0-7 are the preemption count (max preemption depth: 256) - * - bits 8-15 are the softirq count (max # of softirqs: 256) - * - * The hardirq count could in theory be the same as the number of - * interrupts in the system, but we run all interrupt handlers with - * interrupts disabled, so we cannot have nesting interrupts. Though - * there are a few palaeontologic drivers which reenable interrupts in - * the handler, so we need more than one bit here. - * - * PREEMPT_MASK: 0x000000ff - * SOFTIRQ_MASK: 0x0000ff00 - * HARDIRQ_MASK: 0x000f0000 - * NMI_MASK: 0x00100000 - * PREEMPT_ACTIVE: 0x00200000 - */ -#define PREEMPT_BITS 8 -#define SOFTIRQ_BITS 8 -#define HARDIRQ_BITS 4 -#define NMI_BITS 1 - -#define PREEMPT_SHIFT 0 -#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS) -#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS) -#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS) - -#define __IRQ_MASK(x) ((1UL << (x))-1) - -#define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT) -#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) -#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT) -#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT) - -#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT) -#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT) -#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) -#define NMI_OFFSET (1UL << NMI_SHIFT) - -#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) - -#define PREEMPT_ACTIVE_BITS 1 -#define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS) -#define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT) - -#define hardirq_count() (preempt_count() & HARDIRQ_MASK) -#define softirq_count() (preempt_count() & SOFTIRQ_MASK) -#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ - | NMI_MASK)) - -/* - * Are we doing bottom half or hardware interrupt processing? - * Are we in a softirq context? Interrupt context? - * in_softirq - Are we currently processing softirq or have bh disabled? - * in_serving_softirq - Are we currently processing softirq? - */ -#define in_irq() (hardirq_count()) -#define in_softirq() (softirq_count()) -#define in_interrupt() (irq_count()) -#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) - -/* - * Are we in NMI context? - */ -#define in_nmi() (preempt_count() & NMI_MASK) - -#if defined(CONFIG_PREEMPT_COUNT) -# define PREEMPT_CHECK_OFFSET 1 -#else -# define PREEMPT_CHECK_OFFSET 0 -#endif - -/* - * The preempt_count offset needed for things like: - * - * spin_lock_bh() - * - * Which need to disable both preemption (CONFIG_PREEMPT_COUNT) and - * softirqs, such that unlock sequences of: - * - * spin_unlock(); - * local_bh_enable(); - * - * Work as expected. - */ -#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_CHECK_OFFSET) - -/* - * Are we running in atomic context? WARNING: this macro cannot - * always detect atomic context; in particular, it cannot know about - * held spinlocks in non-preemptible kernels. Thus it should not be - * used in the general case to determine whether sleeping is possible. - * Do not use in_atomic() in driver code. - */ -#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0) - -/* - * Check whether we were atomic before we did preempt_disable(): - * (used by the scheduler, *after* releasing the kernel lock) - */ -#define in_atomic_preempt_off() \ - ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET) - -#ifdef CONFIG_PREEMPT_COUNT -# define preemptible() (preempt_count() == 0 && !irqs_disabled()) -#else -# define preemptible() 0 -#endif - -#endif /* LINUX_PREEMPT_MASK_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 5f8defa155cf..c53a1784d7a9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -25,7 +25,7 @@ struct sched_param { #include #include #include -#include +#include #include #include -- cgit v1.2.3 From 2e10e71ce88e3eaccfd09a045ae6ecebe657ba09 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 12 May 2015 16:41:47 +0200 Subject: sched/preempt: Rearrange a few symbols after headers merge Adjust a few comments, and further integrate a few definitions after the dumb headers copy. Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1431441711-29753-3-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/preempt.h | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 8cc0338a5e9a..37974cd4f092 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -9,14 +9,6 @@ #include #include -/* - * We use the MSB mostly because its available; see for - * the other bits -- can't include that header due to inclusion hell. - */ -#define PREEMPT_NEED_RESCHED 0x80000000 - -#include - /* * We put the hardirq and softirq counter into the preemption * counter. The bitmask has the following meaning: @@ -30,11 +22,12 @@ * there are a few palaeontologic drivers which reenable interrupts in * the handler, so we need more than one bit here. * - * PREEMPT_MASK: 0x000000ff - * SOFTIRQ_MASK: 0x0000ff00 - * HARDIRQ_MASK: 0x000f0000 - * NMI_MASK: 0x00100000 - * PREEMPT_ACTIVE: 0x00200000 + * PREEMPT_MASK: 0x000000ff + * SOFTIRQ_MASK: 0x0000ff00 + * HARDIRQ_MASK: 0x000f0000 + * NMI_MASK: 0x00100000 + * PREEMPT_ACTIVE: 0x00200000 + * PREEMPT_NEED_RESCHED: 0x80000000 */ #define PREEMPT_BITS 8 #define SOFTIRQ_BITS 8 @@ -64,6 +57,12 @@ #define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS) #define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT) +/* We use the MSB mostly because its available */ +#define PREEMPT_NEED_RESCHED 0x80000000 + +/* preempt_count() and related functions, depends on PREEMPT_NEED_RESCHED */ +#include + #define hardirq_count() (preempt_count() & HARDIRQ_MASK) #define softirq_count() (preempt_count() & SOFTIRQ_MASK) #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ @@ -122,12 +121,6 @@ #define in_atomic_preempt_off() \ ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET) -#ifdef CONFIG_PREEMPT_COUNT -# define preemptible() (preempt_count() == 0 && !irqs_disabled()) -#else -# define preemptible() 0 -#endif - #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER) extern void preempt_count_add(int val); extern void preempt_count_sub(int val); @@ -160,6 +153,8 @@ do { \ #define preempt_enable_no_resched() sched_preempt_enable_no_resched() +#define preemptible() (preempt_count() == 0 && !irqs_disabled()) + #ifdef CONFIG_PREEMPT #define preempt_enable() \ do { \ @@ -232,6 +227,7 @@ do { \ #define preempt_disable_notrace() barrier() #define preempt_enable_no_resched_notrace() barrier() #define preempt_enable_notrace() barrier() +#define preemptible() 0 #endif /* CONFIG_PREEMPT_COUNT */ -- cgit v1.2.3 From 90b62b5129d5cb50f62f40e684de7a1961e57197 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 12 May 2015 16:41:48 +0200 Subject: sched/preempt: Rename PREEMPT_CHECK_OFFSET to PREEMPT_DISABLE_OFFSET "CHECK" suggests it's only used as a comparison mask. But now it's used further as a config-conditional preempt disabler offset. Lets disambiguate this name. Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1431441711-29753-4-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/preempt.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 37974cd4f092..4689ef210a13 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -85,9 +85,9 @@ #define in_nmi() (preempt_count() & NMI_MASK) #if defined(CONFIG_PREEMPT_COUNT) -# define PREEMPT_CHECK_OFFSET 1 +# define PREEMPT_DISABLE_OFFSET 1 #else -# define PREEMPT_CHECK_OFFSET 0 +# define PREEMPT_DISABLE_OFFSET 0 #endif /* @@ -103,7 +103,7 @@ * * Work as expected. */ -#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_CHECK_OFFSET) +#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_DISABLE_OFFSET) /* * Are we running in atomic context? WARNING: this macro cannot @@ -119,7 +119,7 @@ * (used by the scheduler, *after* releasing the kernel lock) */ #define in_atomic_preempt_off() \ - ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET) + ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_DISABLE_OFFSET) #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER) extern void preempt_count_add(int val); -- cgit v1.2.3 From b30f0e3ffedfa52b1d67a302ae5860c49998e5e2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 12 May 2015 16:41:49 +0200 Subject: sched/preempt: Optimize preemption operations on __schedule() callers __schedule() disables preemption and some of its callers (the preempt_schedule*() family) also set PREEMPT_ACTIVE. So we have two preempt_count() modifications that could be performed at once. Lets remove the preemption disablement from __schedule() and pull this responsibility to its callers in order to optimize preempt_count() operations in a single place. Suggested-by: Linus Torvalds Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1431441711-29753-5-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/preempt.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 4689ef210a13..45da394f2779 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -137,6 +137,18 @@ extern void preempt_count_sub(int val); #define preempt_count_inc() preempt_count_add(1) #define preempt_count_dec() preempt_count_sub(1) +#define preempt_active_enter() \ +do { \ + preempt_count_add(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET); \ + barrier(); \ +} while (0) + +#define preempt_active_exit() \ +do { \ + barrier(); \ + preempt_count_sub(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET); \ +} while (0) + #ifdef CONFIG_PREEMPT_COUNT #define preempt_disable() \ -- cgit v1.2.3 From e017cf21ae82e0b36f026b22083a8ae67926f465 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 12 May 2015 16:41:50 +0200 Subject: sched/preempt: Fix out of date comment Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1431441711-29753-6-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/preempt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 45da394f2779..4057696c641c 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -116,7 +116,7 @@ /* * Check whether we were atomic before we did preempt_disable(): - * (used by the scheduler, *after* releasing the kernel lock) + * (used by the scheduler) */ #define in_atomic_preempt_off() \ ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_DISABLE_OFFSET) -- cgit v1.2.3 From 3e51f3c4004c9b01f66da03214a3e206f5ed627b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 12 May 2015 16:41:51 +0200 Subject: sched/preempt: Remove PREEMPT_ACTIVE unmasking off in_atomic() Now that PREEMPT_ACTIVE implies PREEMPT_DISABLE_OFFSET, ignoring PREEMPT_ACTIVE from in_atomic() check isn't useful anymore. Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1431441711-29753-7-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/preempt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 4057696c641c..a1a00e14c14f 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -112,7 +112,7 @@ * used in the general case to determine whether sleeping is possible. * Do not use in_atomic() in driver code. */ -#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0) +#define in_atomic() (preempt_count() != 0) /* * Check whether we were atomic before we did preempt_disable(): -- cgit v1.2.3 From 8bcbde5480f9777f8b74d71493722c663e22c21b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 11 May 2015 17:52:06 +0200 Subject: sched/preempt, mm/fault: Count pagefault_disable() levels in pagefault_disabled Until now, pagefault_disable()/pagefault_enabled() used the preempt count to track whether in an environment with pagefaults disabled (can be queried via in_atomic()). This patch introduces a separate counter in task_struct to count the level of pagefault_disable() calls. We'll keep manipulating the preempt count to retain compatibility to existing pagefault handlers. It is now possible to verify whether in a pagefault_disable() envionment by calling pagefault_disabled(). In contrast to in_atomic() it will not be influenced by preempt_enable()/preempt_disable(). This patch is based on a patch from Ingo Molnar. Reviewed-and-tested-by: Thomas Gleixner Signed-off-by: David Hildenbrand Signed-off-by: Peter Zijlstra (Intel) Cc: David.Laight@ACULAB.COM Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: airlied@linux.ie Cc: akpm@linux-foundation.org Cc: benh@kernel.crashing.org Cc: bigeasy@linutronix.de Cc: borntraeger@de.ibm.com Cc: daniel.vetter@intel.com Cc: heiko.carstens@de.ibm.com Cc: herbert@gondor.apana.org.au Cc: hocko@suse.cz Cc: hughd@google.com Cc: mst@redhat.com Cc: paulus@samba.org Cc: ralf@linux-mips.org Cc: schwidefsky@de.ibm.com Cc: yang.shi@windriver.com Link: http://lkml.kernel.org/r/1431359540-32227-2-git-send-email-dahi@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + include/linux/uaccess.h | 36 +++++++++++++++++++++++++++++------- 2 files changed, 30 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index c53a1784d7a9..dd07ac03f82a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1788,6 +1788,7 @@ struct task_struct { #ifdef CONFIG_DEBUG_ATOMIC_SLEEP unsigned long task_state_change; #endif + int pagefault_disabled; }; /* Future-safe accessor for struct task_struct's cpus_allowed. */ diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index ecd3319dac33..23290cc93a24 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -2,20 +2,36 @@ #define __LINUX_UACCESS_H__ #include +#include #include +static __always_inline void pagefault_disabled_inc(void) +{ + current->pagefault_disabled++; +} + +static __always_inline void pagefault_disabled_dec(void) +{ + current->pagefault_disabled--; + WARN_ON(current->pagefault_disabled < 0); +} + /* - * These routines enable/disable the pagefault handler in that - * it will not take any locks and go straight to the fixup table. + * These routines enable/disable the pagefault handler. If disabled, it will + * not take any locks and go straight to the fixup table. + * + * We increase the preempt and the pagefault count, to be able to distinguish + * whether we run in simple atomic context or in a real pagefault_disable() + * context. + * + * For now, after pagefault_disabled() has been called, we run in atomic + * context. User access methods will not sleep. * - * They have great resemblance to the preempt_disable/enable calls - * and in fact they are identical; this is because currently there is - * no other way to make the pagefault handlers do this. So we do - * disable preemption but we don't necessarily care about that. */ static inline void pagefault_disable(void) { preempt_count_inc(); + pagefault_disabled_inc(); /* * make sure to have issued the store before a pagefault * can hit. @@ -25,18 +41,24 @@ static inline void pagefault_disable(void) static inline void pagefault_enable(void) { -#ifndef CONFIG_PREEMPT /* * make sure to issue those last loads/stores before enabling * the pagefault handler again. */ barrier(); + pagefault_disabled_dec(); +#ifndef CONFIG_PREEMPT preempt_count_dec(); #else preempt_enable(); #endif } +/* + * Is the pagefault handler disabled? If so, user access methods will not sleep. + */ +#define pagefault_disabled() (current->pagefault_disabled != 0) + #ifndef ARCH_HAS_NOCACHE_UACCESS static inline unsigned long __copy_from_user_inatomic_nocache(void *to, -- cgit v1.2.3 From 9ec23531fd48031d1b6ca5366f5f967d17a8bc28 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 11 May 2015 17:52:07 +0200 Subject: sched/preempt, mm/fault: Trigger might_sleep() in might_fault() with disabled pagefaults Commit 662bbcb2747c ("mm, sched: Allow uaccess in atomic with pagefault_disable()") removed might_sleep() checks for all user access code (that uses might_fault()). The reason was to disable wrong "sleep in atomic" warnings in the following scenario: pagefault_disable() rc = copy_to_user(...) pagefault_enable() Which is valid, as pagefault_disable() increments the preempt counter and therefore disables the pagefault handler. copy_to_user() will not sleep and return an error code if a page is not available. However, as all might_sleep() checks are removed, CONFIG_DEBUG_ATOMIC_SLEEP would no longer detect the following scenario: spin_lock(&lock); rc = copy_to_user(...) spin_unlock(&lock) If the kernel is compiled with preemption turned on, preempt_disable() will make in_atomic() detect disabled preemption. The fault handler would correctly never sleep on user access. However, with preemption turned off, preempt_disable() is usually a NOP (with !CONFIG_PREEMPT_COUNT), therefore in_atomic() will not be able to detect disabled preemption nor disabled pagefaults. The fault handler could sleep. We really want to enable CONFIG_DEBUG_ATOMIC_SLEEP checks for user access functions again, otherwise we can end up with horrible deadlocks. Root of all evil is that pagefault_disable() acts almost as preempt_disable(), depending on preemption being turned on/off. As we now have pagefault_disabled(), we can use it to distinguish whether user acces functions might sleep. Convert might_fault() into a makro that calls __might_fault(), to allow proper file + line messages in case of a might_sleep() warning. Reviewed-and-tested-by: Thomas Gleixner Signed-off-by: David Hildenbrand Signed-off-by: Peter Zijlstra (Intel) Cc: David.Laight@ACULAB.COM Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: airlied@linux.ie Cc: akpm@linux-foundation.org Cc: benh@kernel.crashing.org Cc: bigeasy@linutronix.de Cc: borntraeger@de.ibm.com Cc: daniel.vetter@intel.com Cc: heiko.carstens@de.ibm.com Cc: herbert@gondor.apana.org.au Cc: hocko@suse.cz Cc: hughd@google.com Cc: mst@redhat.com Cc: paulus@samba.org Cc: ralf@linux-mips.org Cc: schwidefsky@de.ibm.com Cc: yang.shi@windriver.com Link: http://lkml.kernel.org/r/1431359540-32227-3-git-send-email-dahi@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 3a5b48e52a9e..060dd7b61c6d 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -244,7 +244,8 @@ static inline u32 reciprocal_scale(u32 val, u32 ep_ro) #if defined(CONFIG_MMU) && \ (defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP)) -void might_fault(void); +#define might_fault() __might_fault(__FILE__, __LINE__) +void __might_fault(const char *file, int line); #else static inline void might_fault(void) { } #endif -- cgit v1.2.3 From 2cb7c9cb426660b5ed58b643d9e7dd5d50ba901f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 11 May 2015 17:52:09 +0200 Subject: sched/preempt, mm/kmap: Explicitly disable/enable preemption in kmap_atomic_* The existing code relies on pagefault_disable() implicitly disabling preemption, so that no schedule will happen between kmap_atomic() and kunmap_atomic(). Let's make this explicit, to prepare for pagefault_disable() not touching preemption anymore. Reviewed-and-tested-by: Thomas Gleixner Signed-off-by: David Hildenbrand Signed-off-by: Peter Zijlstra (Intel) Cc: David.Laight@ACULAB.COM Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: airlied@linux.ie Cc: akpm@linux-foundation.org Cc: benh@kernel.crashing.org Cc: bigeasy@linutronix.de Cc: borntraeger@de.ibm.com Cc: daniel.vetter@intel.com Cc: heiko.carstens@de.ibm.com Cc: herbert@gondor.apana.org.au Cc: hocko@suse.cz Cc: hughd@google.com Cc: mst@redhat.com Cc: paulus@samba.org Cc: ralf@linux-mips.org Cc: schwidefsky@de.ibm.com Cc: yang.shi@windriver.com Link: http://lkml.kernel.org/r/1431359540-32227-5-git-send-email-dahi@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/highmem.h | 2 ++ include/linux/io-mapping.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 9286a46b7d69..6aefcd0031a6 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -65,6 +65,7 @@ static inline void kunmap(struct page *page) static inline void *kmap_atomic(struct page *page) { + preempt_disable(); pagefault_disable(); return page_address(page); } @@ -73,6 +74,7 @@ static inline void *kmap_atomic(struct page *page) static inline void __kunmap_atomic(void *addr) { pagefault_enable(); + preempt_enable(); } #define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn)) diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 657fab4efab3..c27dde7215b5 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -141,6 +141,7 @@ static inline void __iomem * io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset) { + preempt_disable(); pagefault_disable(); return ((char __force __iomem *) mapping) + offset; } @@ -149,6 +150,7 @@ static inline void io_mapping_unmap_atomic(void __iomem *vaddr) { pagefault_enable(); + preempt_enable(); } /* Non-atomic map/unmap */ -- cgit v1.2.3 From 70ffdb9393a7264a069265edded729078dcf0425 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 11 May 2015 17:52:11 +0200 Subject: mm/fault, arch: Use pagefault_disable() to check for disabled pagefaults in the handler Introduce faulthandler_disabled() and use it to check for irq context and disabled pagefaults (via pagefault_disable()) in the pagefault handlers. Please note that we keep the in_atomic() checks in place - to detect whether in irq context (in which case preemption is always properly disabled). In contrast, preempt_disable() should never be used to disable pagefaults. With !CONFIG_PREEMPT_COUNT, preempt_disable() doesn't modify the preempt counter, and therefore the result of in_atomic() differs. We validate that condition by using might_fault() checks when calling might_sleep(). Therefore, add a comment to faulthandler_disabled(), describing why this is needed. faulthandler_disabled() and pagefault_disable() are defined in linux/uaccess.h, so let's properly add that include to all relevant files. This patch is based on a patch from Thomas Gleixner. Reviewed-and-tested-by: Thomas Gleixner Signed-off-by: David Hildenbrand Signed-off-by: Peter Zijlstra (Intel) Cc: David.Laight@ACULAB.COM Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: airlied@linux.ie Cc: akpm@linux-foundation.org Cc: benh@kernel.crashing.org Cc: bigeasy@linutronix.de Cc: borntraeger@de.ibm.com Cc: daniel.vetter@intel.com Cc: heiko.carstens@de.ibm.com Cc: herbert@gondor.apana.org.au Cc: hocko@suse.cz Cc: hughd@google.com Cc: mst@redhat.com Cc: paulus@samba.org Cc: ralf@linux-mips.org Cc: schwidefsky@de.ibm.com Cc: yang.shi@windriver.com Link: http://lkml.kernel.org/r/1431359540-32227-7-git-send-email-dahi@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/uaccess.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 23290cc93a24..90786d2d74e5 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -59,6 +59,18 @@ static inline void pagefault_enable(void) */ #define pagefault_disabled() (current->pagefault_disabled != 0) +/* + * The pagefault handler is in general disabled by pagefault_disable() or + * when in irq context (via in_atomic()). + * + * This function should only be used by the fault handlers. Other users should + * stick to pagefault_disabled(). + * Please NEVER use preempt_disable() to disable the fault handler. With + * !CONFIG_PREEMPT_COUNT, this is like a NOP. So the handler won't be disabled. + * in_atomic() will report different values based on !CONFIG_PREEMPT_COUNT. + */ +#define faulthandler_disabled() (pagefault_disabled() || in_atomic()) + #ifndef ARCH_HAS_NOCACHE_UACCESS static inline unsigned long __copy_from_user_inatomic_nocache(void *to, -- cgit v1.2.3 From 8222dbe21e79338de92d5e1956cd1e3994cc9f93 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 11 May 2015 17:52:20 +0200 Subject: sched/preempt, mm/fault: Decouple preemption from the page fault logic As the fault handlers now all rely on the pagefault_disabled() checks and implicit preempt_disable() calls by pagefault_disable() have been made explicit, we can completely rely on the pagefault_disableD counter. So let's no longer touch the preempt count when disabling/enabling pagefaults. After a call to pagefault_disable(), pagefault_disabled() will return true, but in_atomic() won't. Reviewed-and-tested-by: Thomas Gleixner Signed-off-by: David Hildenbrand Signed-off-by: Peter Zijlstra (Intel) Cc: David.Laight@ACULAB.COM Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: airlied@linux.ie Cc: akpm@linux-foundation.org Cc: benh@kernel.crashing.org Cc: bigeasy@linutronix.de Cc: borntraeger@de.ibm.com Cc: daniel.vetter@intel.com Cc: heiko.carstens@de.ibm.com Cc: herbert@gondor.apana.org.au Cc: hocko@suse.cz Cc: hughd@google.com Cc: mst@redhat.com Cc: paulus@samba.org Cc: ralf@linux-mips.org Cc: schwidefsky@de.ibm.com Cc: yang.shi@windriver.com Link: http://lkml.kernel.org/r/1431359540-32227-16-git-send-email-dahi@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/uaccess.h | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 90786d2d74e5..ae572c138607 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -1,7 +1,6 @@ #ifndef __LINUX_UACCESS_H__ #define __LINUX_UACCESS_H__ -#include #include #include @@ -20,17 +19,11 @@ static __always_inline void pagefault_disabled_dec(void) * These routines enable/disable the pagefault handler. If disabled, it will * not take any locks and go straight to the fixup table. * - * We increase the preempt and the pagefault count, to be able to distinguish - * whether we run in simple atomic context or in a real pagefault_disable() - * context. - * - * For now, after pagefault_disabled() has been called, we run in atomic - * context. User access methods will not sleep. - * + * User access methods will not sleep when called from a pagefault_disabled() + * environment. */ static inline void pagefault_disable(void) { - preempt_count_inc(); pagefault_disabled_inc(); /* * make sure to have issued the store before a pagefault @@ -47,11 +40,6 @@ static inline void pagefault_enable(void) */ barrier(); pagefault_disabled_dec(); -#ifndef CONFIG_PREEMPT - preempt_count_dec(); -#else - preempt_enable(); -#endif } /* -- cgit v1.2.3 From 80ed87c8a9ca0cad7ca66cf3bbdfb17559a66dcf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 8 May 2015 14:23:45 +0200 Subject: sched/wait: Introduce TASK_NOLOAD and TASK_IDLE Currently people use TASK_INTERRUPTIBLE to idle kthreads and wait for 'work' because TASK_UNINTERRUPTIBLE contributes to the loadavg. Having all idle kthreads contribute to the loadavg is somewhat silly. Now mostly this works OK, because kthreads have all their signals masked. However there's a few sites where this is causing problems and TASK_UNINTERRUPTIBLE should be used, except for that loadavg issue. This patch adds TASK_NOLOAD which, when combined with TASK_UNINTERRUPTIBLE avoids the loadavg accounting. As most of imagined usage sites are loops where a thread wants to idle, waiting for work, a helper TASK_IDLE is introduced. Signed-off-by: Peter Zijlstra (Intel) Cc: Julian Anastasov Cc: Linus Torvalds Cc: NeilBrown Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index dd07ac03f82a..7de815c6fa78 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -218,9 +218,10 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); #define TASK_WAKEKILL 128 #define TASK_WAKING 256 #define TASK_PARKED 512 -#define TASK_STATE_MAX 1024 +#define TASK_NOLOAD 1024 +#define TASK_STATE_MAX 2048 -#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWP" +#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPN" extern char ___assert_task_state[1 - 2*!!( sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)]; @@ -230,6 +231,8 @@ extern char ___assert_task_state[1 - 2*!!( #define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) #define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED) +#define TASK_IDLE (TASK_UNINTERRUPTIBLE | TASK_NOLOAD) + /* Convenience macros for the sake of wake_up */ #define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE) #define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED) @@ -245,7 +248,8 @@ extern char ___assert_task_state[1 - 2*!!( ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) #define task_contributes_to_load(task) \ ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ - (task->flags & PF_FROZEN) == 0) + (task->flags & PF_FROZEN) == 0 && \ + (task->state & TASK_NOLOAD) == 0) #ifdef CONFIG_DEBUG_ATOMIC_SLEEP -- cgit v1.2.3 From f03123783d4e43cd59df58e23e963136e04f8280 Mon Sep 17 00:00:00 2001 From: Ramakrishna Pallala Date: Thu, 30 Apr 2015 20:44:45 +0530 Subject: extcon: axp288: Add axp288 extcon driver support This patch adds the extcon support for AXP288 PMIC which has the BC1.2 charger detection capability. Additionally it also adds the USB mux switching support b/w SOC and PMIC based on GPIO control. Signed-off-by: Ramakrishna Pallala Acked-by: Lee Jones [cw00.choi: Modify the log message to keep the consistent log message pattern] Signed-off-by: Chanwoo Choi --- include/linux/mfd/axp20x.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index dfabd6db7ddf..4ed8071d062e 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -275,4 +275,9 @@ struct axp20x_fg_pdata { int thermistor_curve[MAX_THERM_CURVE_SIZE][2]; }; +struct axp288_extcon_pdata { + /* GPIO pin control to switch D+/D- lines b/w PMIC and SOC */ + struct gpio_desc *gpio_mux_cntl; +}; + #endif /* __LINUX_MFD_AXP20X_H */ -- cgit v1.2.3 From 707d7550875a9bd245ce5f1077d2d2cb44eab218 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Wed, 15 Apr 2015 13:57:51 +0900 Subject: extcon: Add extcon_get_edev_name() API to get the extcon device name This patch adds the extcon_get_edev_name() API to get the name of extcon device because all information inclued in the structure extcon_dev should be accessed by extcon core API instead of directly accessing the data. Signed-off-by: Chanwoo Choi --- include/linux/extcon.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 36f49c405dfb..e2cf6254c86a 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -259,6 +259,10 @@ extern int extcon_unregister_notifier(struct extcon_dev *edev, * This function use phandle of devicetree to get extcon device directly. */ extern struct extcon_dev *extcon_get_edev_by_phandle(struct device *dev, int index); + +/* Following API to get information of extcon device */ +extern const char *extcon_get_edev_name(struct extcon_dev *edev); + #else /* CONFIG_EXTCON */ static inline int extcon_dev_register(struct extcon_dev *edev) { -- cgit v1.2.3 From b9ec23c08a0274d31ee626f14b359563ea0cae46 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Fri, 24 Apr 2015 14:48:52 +0900 Subject: extcon: Fix the checkpatch warning and minor coding style issue This patch clean up the extcon core driver by fixing the checkpatch warning and minor coding style issue. Signed-off-by: Chanwoo Choi --- include/linux/extcon.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index e2cf6254c86a..799474d9dc48 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -97,8 +97,8 @@ struct extcon_cable; * @state: Attach/detach state of this extcon. Do not provide at * register-time. * @nh: Notifier for the state change events from this extcon - * @entry: To support list of extcon devices so that users can search - * for extcon devices based on the extcon name. + * @entry: To support list of extcon devices so that users can + * search for extcon devices based on the extcon name. * @lock: * @max_supported: Internal value to store the number of cables. * @extcon_dev_type: Device_type struct to provide attribute_groups @@ -258,7 +258,8 @@ extern int extcon_unregister_notifier(struct extcon_dev *edev, * Following API get the extcon device from devicetree. * This function use phandle of devicetree to get extcon device directly. */ -extern struct extcon_dev *extcon_get_edev_by_phandle(struct device *dev, int index); +extern struct extcon_dev *extcon_get_edev_by_phandle(struct device *dev, + int index); /* Following API to get information of extcon device */ extern const char *extcon_get_edev_name(struct extcon_dev *edev); -- cgit v1.2.3 From 9e86b2ad4c11fd52ee8133abce7a29e0b32d29a7 Mon Sep 17 00:00:00 2001 From: Inha Song Date: Mon, 4 May 2015 13:42:13 +0900 Subject: extcon: arizona: Add support for select accessory detect mode when headphone detection This patch add support for select accessory detect mode to HPDETL or HPDETR. Arizona provides a headphone detection circuit on the HPDETL and HPDETR pins to measure the impedance of an external load connected to the headphone. Depending on board design, headphone detect pins can change to HPDETR or HPDETL. Signed-off-by: Inha Song Acked-by: Lee Jones Acked-by: Charles Keepax Signed-off-by: Chanwoo Choi --- include/linux/mfd/arizona/pdata.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h index 1789cb0f4f17..aa5c48b06755 100644 --- a/include/linux/mfd/arizona/pdata.h +++ b/include/linux/mfd/arizona/pdata.h @@ -121,6 +121,9 @@ struct arizona_pdata { /** GPIO used for mic isolation with HPDET */ int hpdet_id_gpio; + /** Channel to use for headphone detection */ + unsigned int hpdet_channel; + /** Extra debounce timeout used during initial mic detection (ms) */ int micd_detect_debounce; -- cgit v1.2.3 From ca42aaf0c8616cde6161ea4391dff364efeee46a Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Mon, 18 May 2015 14:19:13 +0200 Subject: time: Refactor msecs_to_jiffies Refactor the msecs_to_jiffies conditional code part in time.c and jiffies.h putting it into conditional functions rather than #ifdefs to improve readability. [ tglx: Verified that there is no binary code change ] Signed-off-by: Nicholas Mc Guire Cc: Masahiro Yamada Cc: Sam Ravnborg Cc: Joe Perches Cc: John Stultz Cc: Andrew Hunter Cc: Paul Turner Cc: Michal Marek Link: http://lkml.kernel.org/r/1431951554-5563-2-git-send-email-hofrat@osadl.org Signed-off-by: Thomas Gleixner --- include/linux/jiffies.h | 64 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index c367cbdf73ab..9527ddbb0f1b 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -7,6 +7,7 @@ #include #include #include /* for HZ */ +#include /* * The following defines establish the engineering parameters of the PLL @@ -288,7 +289,68 @@ static inline u64 jiffies_to_nsecs(const unsigned long j) return (u64)jiffies_to_usecs(j) * NSEC_PER_USEC; } -extern unsigned long msecs_to_jiffies(const unsigned int m); +extern unsigned long __msecs_to_jiffies(const unsigned int m); +#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) +/* + * HZ is equal to or smaller than 1000, and 1000 is a nice round + * multiple of HZ, divide with the factor between them, but round + * upwards: + */ +static inline unsigned long _msecs_to_jiffies(const unsigned int m) +{ + return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ); +} +#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) +/* + * HZ is larger than 1000, and HZ is a nice round multiple of 1000 - + * simply multiply with the factor between them. + * + * But first make sure the multiplication result cannot overflow: + */ +static inline unsigned long _msecs_to_jiffies(const unsigned int m) +{ + if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) + return MAX_JIFFY_OFFSET; + return m * (HZ / MSEC_PER_SEC); +} +#else +/* + * Generic case - multiply, round and divide. But first check that if + * we are doing a net multiplication, that we wouldn't overflow: + */ +static inline unsigned long _msecs_to_jiffies(const unsigned int m) +{ + if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) + return MAX_JIFFY_OFFSET; + + return (MSEC_TO_HZ_MUL32 * m + MSEC_TO_HZ_ADJ32) + >> MSEC_TO_HZ_SHR32; +} +#endif +/** + * msecs_to_jiffies: - convert milliseconds to jiffies + * @m: time in milliseconds + * + * conversion is done as follows: + * + * - negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET) + * + * - 'too large' values [that would result in larger than + * MAX_JIFFY_OFFSET values] mean 'infinite timeout' too. + * + * - all other values are converted to jiffies by either multiplying + * the input value by a factor or dividing it with a factor and + * handling any 32-bit overflows. + * for the details see __msecs_to_jiffies() + * + * the HZ range specific helpers _msecs_to_jiffies() are called from + * __msecs_to_jiffies(). + */ +static inline unsigned long msecs_to_jiffies(const unsigned int m) +{ + return __msecs_to_jiffies(m); +} + extern unsigned long usecs_to_jiffies(const unsigned int u); extern unsigned long timespec_to_jiffies(const struct timespec *value); extern void jiffies_to_timespec(const unsigned long jiffies, -- cgit v1.2.3 From daa67b4b70568a07fef3cffacb2055891bf42ddb Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Mon, 18 May 2015 14:19:14 +0200 Subject: time: Allow gcc to fold constants when possible To allow constant folding in msecs_to_jiffies() conditionally calls the HZ dependent _msecs_to_jiffies() helpers or, when gcc can not figure out constant folding, __msecs_to_jiffies which is the renamed original msecs_to_jiffies() function. Signed-off-by: Nicholas Mc Guire Cc: Masahiro Yamada Cc: Sam Ravnborg Cc: Joe Perches Cc: John Stultz Cc: Andrew Hunter Cc: Paul Turner Cc: Michal Marek Link: http://lkml.kernel.org/r/1431951554-5563-3-git-send-email-hofrat@osadl.org Signed-off-by: Thomas Gleixner --- include/linux/jiffies.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 9527ddbb0f1b..5e75af6cf1bc 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -343,12 +343,24 @@ static inline unsigned long _msecs_to_jiffies(const unsigned int m) * handling any 32-bit overflows. * for the details see __msecs_to_jiffies() * - * the HZ range specific helpers _msecs_to_jiffies() are called from - * __msecs_to_jiffies(). + * msecs_to_jiffies() checks for the passed in value being a constant + * via __builtin_constant_p() allowing gcc to eliminate most of the + * code, __msecs_to_jiffies() is called if the value passed does not + * allow constant folding and the actual conversion must be done at + * runtime. + * the HZ range specific helpers _msecs_to_jiffies() are called both + * directly here and from __msecs_to_jiffies() in the case where + * constant folding is not possible. */ static inline unsigned long msecs_to_jiffies(const unsigned int m) { - return __msecs_to_jiffies(m); + if (__builtin_constant_p(m)) { + if ((int)m < 0) + return MAX_JIFFY_OFFSET; + return _msecs_to_jiffies(m); + } else { + return __msecs_to_jiffies(m); + } } extern unsigned long usecs_to_jiffies(const unsigned int u); -- cgit v1.2.3 From 0a4377de305684c883bf90ad21e3cbdeead70f5c Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 19 May 2015 17:07:14 +0800 Subject: genirq: Introduce irq_set_vcpu_affinity() to target an interrupt to a VCPU With Posted-Interrupts support in Intel CPU and IOMMU, an external interrupt from assigned-devices could be directly delivered to a virtual CPU in a virtual machine. Instead of hacking KVM and Intel IOMMU drivers, we propose a platform independent interface to target an interrupt to a specific virtual CPU in a virtual machine, or set virtual CPU affinity for an interrupt. By adopting this new interface and the hierarchy irqdomain, we could easily support posted-interrupts on Intel platforms, and also provide flexible enough interfaces for other platforms to support similar features. Here is the usage scenario for this interface: Guest update MSI/MSI-X interrupt configuration -->QEMU and KVM handle this -->KVM call this interface (passing posted interrupts descriptor and guest vector) -->irq core will transfer the control to IOMMU -->IOMMU will do the real work of updating IRTE (IRTE has new format for VT-d Posted-Interrupts) Signed-off-by: Jiang Liu Signed-off-by: Feng Wu Link: http://lkml.kernel.org/r/1432026437-16560-2-git-send-email-feng.wu@intel.com Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 62c6901cab55..48cb7d1aa58f 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -327,6 +327,7 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) * @irq_write_msi_msg: optional to write message content for MSI * @irq_get_irqchip_state: return the internal state of an interrupt * @irq_set_irqchip_state: set the internal state of a interrupt + * @irq_set_vcpu_affinity: optional to target a vCPU in a virtual machine * @flags: chip specific flags */ struct irq_chip { @@ -369,6 +370,8 @@ struct irq_chip { int (*irq_get_irqchip_state)(struct irq_data *data, enum irqchip_irq_state which, bool *state); int (*irq_set_irqchip_state)(struct irq_data *data, enum irqchip_irq_state which, bool state); + int (*irq_set_vcpu_affinity)(struct irq_data *data, void *vcpu_info); + unsigned long flags; }; @@ -422,6 +425,7 @@ extern void irq_cpu_online(void); extern void irq_cpu_offline(void); extern int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *cpumask, bool force); +extern int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info); #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ) void irq_move_irq(struct irq_data *data); @@ -467,6 +471,8 @@ extern int irq_chip_set_affinity_parent(struct irq_data *data, const struct cpumask *dest, bool force); extern int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on); +extern int irq_chip_set_vcpu_affinity_parent(struct irq_data *data, + void *vcpu_info); #endif /* Handling of unhandled and spurious interrupts: */ -- cgit v1.2.3 From 8fff52fd50934580c5108afed12043a774edf728 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 3 Apr 2015 09:04:04 +0530 Subject: clockevents: Introduce CLOCK_EVT_STATE_ONESHOT_STOPPED state When no timers/hrtimers are pending, the expiry time is set to a special value: 'KTIME_MAX'. This normally happens with NO_HZ_{IDLE|FULL} in both LOWRES/HIGHRES modes. When 'expiry == KTIME_MAX', we either cancel the 'tick-sched' hrtimer (NOHZ_MODE_HIGHRES) or skip reprogramming clockevent device (NOHZ_MODE_LOWRES). But, the clockevent device is already reprogrammed from the tick-handler for next tick. As the clock event device is programmed in ONESHOT mode it will at least fire one more time (unnecessarily). Timers on few implementations (like arm_arch_timer, etc.) only support PERIODIC mode and their drivers emulate ONESHOT over that. Which means that on these platforms we will get spurious interrupts periodically (at last programmed interval rate, normally tick rate). In order to avoid spurious interrupts, the clockevent device should be stopped or its interrupts should be masked. A simple (yet hacky) solution to get this fixed could be: update hrtimer_force_reprogram() to always reprogram clockevent device and update clockevent drivers to STOP generating events (or delay it to max time) when 'expires' is set to KTIME_MAX. But the drawback here is that every clockevent driver has to be hacked for this particular case and its very easy for new ones to miss this. However, Thomas suggested to add an optional state ONESHOT_STOPPED to solve this problem: lkml.org/lkml/2014/5/9/508. This patch adds support for ONESHOT_STOPPED state in clockevents core. It will only be available to drivers that implement the state-specific callbacks instead of the legacy ->set_mode() callback. Signed-off-by: Viresh Kumar Reviewed-by: Preeti U. Murthy Cc: linaro-kernel@lists.linaro.org Cc: Frederic Weisbecker Cc: Kevin Hilman Cc: Daniel Lezcano Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/b8b383a03ac07b13312c16850b5106b82e4245b5.1428031396.git.viresh.kumar@linaro.org Signed-off-by: Thomas Gleixner --- include/linux/clockchips.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 96c280b2c263..271fa4c8eb29 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -37,12 +37,15 @@ enum clock_event_mode { * reached from DETACHED or SHUTDOWN. * ONESHOT: Device is programmed to generate event only once. Can be reached * from DETACHED or SHUTDOWN. + * ONESHOT_STOPPED: Device was programmed in ONESHOT mode and is temporarily + * stopped. */ enum clock_event_state { CLOCK_EVT_STATE_DETACHED, CLOCK_EVT_STATE_SHUTDOWN, CLOCK_EVT_STATE_PERIODIC, CLOCK_EVT_STATE_ONESHOT, + CLOCK_EVT_STATE_ONESHOT_STOPPED, }; /* @@ -90,6 +93,7 @@ enum clock_event_state { * @set_mode: legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME. * @set_state_periodic: switch state to periodic, if !set_mode * @set_state_oneshot: switch state to oneshot, if !set_mode + * @set_state_oneshot_stopped: switch state to oneshot_stopped, if !set_mode * @set_state_shutdown: switch state to shutdown, if !set_mode * @tick_resume: resume clkevt device, if !set_mode * @broadcast: function to broadcast events @@ -121,11 +125,12 @@ struct clock_event_device { * State transition callback(s): Only one of the two groups should be * defined: * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME. - * - set_state_{shutdown|periodic|oneshot}(), tick_resume(). + * - set_state_{shutdown|periodic|oneshot|oneshot_stopped}(), tick_resume(). */ void (*set_mode)(enum clock_event_mode mode, struct clock_event_device *); int (*set_state_periodic)(struct clock_event_device *); int (*set_state_oneshot)(struct clock_event_device *); + int (*set_state_oneshot_stopped)(struct clock_event_device *); int (*set_state_shutdown)(struct clock_event_device *); int (*tick_resume)(struct clock_event_device *); -- cgit v1.2.3 From 4ecd4fef3a074c8bb43c391a57742c422469ebbd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 May 2015 09:38:13 +0200 Subject: block: use an atomic_t for mq_freeze_depth lockdep gets unhappy about the not disabling irqs when using the queue_lock around it. Instead of trying to fix that up just switch to an atomic_t and get rid of the lock. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2da818a48097..bc917956a6d0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -444,7 +444,7 @@ struct request_queue { struct mutex sysfs_lock; int bypass_depth; - int mq_freeze_depth; + atomic_t mq_freeze_depth; #if defined(CONFIG_BLK_DEV_BSG) bsg_job_fn *bsg_job_fn; -- cgit v1.2.3 From b25de9d6da49b1a8760a89672283128aa8c78345 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 24 Apr 2015 21:41:01 +0200 Subject: block: remove BIO_EOPNOTSUPP Since the big barrier rewrite/removal in 2007 we never fail FLUSH or FUA requests, which means we can remove the magic BIO_EOPNOTSUPP flag to help propagating those to the buffer_head layer. Signed-off-by: Christoph Hellwig Reviewed-by: Jeff Moyer Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 93d2e7153816..daf95915d104 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -118,7 +118,6 @@ struct bio { #define BIO_CLONED 4 /* doesn't own data */ #define BIO_BOUNCED 5 /* bio is a bounce bio */ #define BIO_USER_MAPPED 6 /* contains user pages */ -#define BIO_EOPNOTSUPP 7 /* not supported */ #define BIO_NULL_MAPPED 8 /* contains invalid user pages */ #define BIO_QUIET 9 /* Make BIO Quiet */ #define BIO_SNAP_STABLE 10 /* bio data must be snapshotted during write */ -- cgit v1.2.3 From 97ca223c3b37ed12a5b67a5dc6247e5a4799d337 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 24 Apr 2015 21:41:02 +0200 Subject: block: remove unused BIO_RW_BLOCK and BIO_EOF flags Signed-off-by: Christoph Hellwig Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index daf95915d104..09c7a2cd48ef 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -112,8 +112,6 @@ struct bio { * bio flags */ #define BIO_UPTODATE 0 /* ok after I/O completion */ -#define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */ -#define BIO_EOF 2 /* out-out-bounds error */ #define BIO_SEG_VALID 3 /* bi_phys_segments valid */ #define BIO_CLONED 4 /* doesn't own data */ #define BIO_BOUNCED 5 /* bio is a bounce bio */ -- cgit v1.2.3 From 4e3d9cb0134fea035e6eb1707e5e7d8aaffa186d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 19 May 2015 17:14:51 +0200 Subject: jiffies: Remove the extra indentation level Somehow I missed to clean that up when applying the patches. Fix it up now. Reported-by: Joe Perches Signed-off-by: Thomas Gleixner Cc: Nicholas Mc Guire --- include/linux/jiffies.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 5e75af6cf1bc..3bde5eb8568b 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -298,7 +298,7 @@ extern unsigned long __msecs_to_jiffies(const unsigned int m); */ static inline unsigned long _msecs_to_jiffies(const unsigned int m) { - return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ); + return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ); } #elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) /* @@ -309,9 +309,9 @@ static inline unsigned long _msecs_to_jiffies(const unsigned int m) */ static inline unsigned long _msecs_to_jiffies(const unsigned int m) { - if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) - return MAX_JIFFY_OFFSET; - return m * (HZ / MSEC_PER_SEC); + if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) + return MAX_JIFFY_OFFSET; + return m * (HZ / MSEC_PER_SEC); } #else /* @@ -320,11 +320,10 @@ static inline unsigned long _msecs_to_jiffies(const unsigned int m) */ static inline unsigned long _msecs_to_jiffies(const unsigned int m) { - if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) - return MAX_JIFFY_OFFSET; + if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) + return MAX_JIFFY_OFFSET; - return (MSEC_TO_HZ_MUL32 * m + MSEC_TO_HZ_ADJ32) - >> MSEC_TO_HZ_SHR32; + return (MSEC_TO_HZ_MUL32 * m + MSEC_TO_HZ_ADJ32) >> MSEC_TO_HZ_SHR32; } #endif /** -- cgit v1.2.3 From b2dbe0a60f1bcf4db5c701f1577b3135c3159eb5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 19 May 2015 09:18:28 -0600 Subject: block: collapse bio bit space Various previous patches removed bits and left holes, collapse them all. Leave the reset start bit where it is, we don't need to change that. Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 09c7a2cd48ef..3f4ded0b1a34 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -112,15 +112,15 @@ struct bio { * bio flags */ #define BIO_UPTODATE 0 /* ok after I/O completion */ -#define BIO_SEG_VALID 3 /* bi_phys_segments valid */ -#define BIO_CLONED 4 /* doesn't own data */ -#define BIO_BOUNCED 5 /* bio is a bounce bio */ -#define BIO_USER_MAPPED 6 /* contains user pages */ -#define BIO_NULL_MAPPED 8 /* contains invalid user pages */ -#define BIO_QUIET 9 /* Make BIO Quiet */ -#define BIO_SNAP_STABLE 10 /* bio data must be snapshotted during write */ -#define BIO_CHAIN 11 /* chained bio, ->bi_remaining in effect */ -#define BIO_REFFED 12 /* bio has elevated ->bi_cnt */ +#define BIO_SEG_VALID 1 /* bi_phys_segments valid */ +#define BIO_CLONED 2 /* doesn't own data */ +#define BIO_BOUNCED 3 /* bio is a bounce bio */ +#define BIO_USER_MAPPED 4 /* contains user pages */ +#define BIO_NULL_MAPPED 5 /* contains invalid user pages */ +#define BIO_QUIET 6 /* Make BIO Quiet */ +#define BIO_SNAP_STABLE 7 /* bio data must be snapshotted during write */ +#define BIO_CHAIN 8 /* chained bio, ->bi_remaining in effect */ +#define BIO_REFFED 9 /* bio has elevated ->bi_cnt */ /* * Flags starting here get preserved by bio_reset() - this includes -- cgit v1.2.3 From 343df3c79c62b644ce6ff5dff96c9e0be1ecb242 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 May 2015 09:23:23 +0200 Subject: suspend: simplify block I/O handling Stop abusing struct page functionality and the swap end_io handler, and instead add a modified version of the blk-lib.c bio_batch helpers. Also move the block I/O code into swap.c as they are directly tied into each other. Signed-off-by: Christoph Hellwig Tested-by: Pavel Machek Tested-by: Ming Lin Acked-by: Pavel Machek Acked-by: Rafael J. Wysocki Signed-off-by: Jens Axboe --- include/linux/swap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index cee108cbe2d5..38874729dc5f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -377,7 +377,6 @@ extern void end_swap_bio_write(struct bio *bio, int err); extern int __swap_writepage(struct page *page, struct writeback_control *wbc, void (*end_write_func)(struct bio *, int)); extern int swap_set_page_dirty(struct page *page); -extern void end_swap_bio_read(struct bio *bio, int err); int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, unsigned long nr_pages, sector_t start_block); -- cgit v1.2.3 From 3520469d65f26a1cd2f610f5d5de976f78db74fe Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 2 Apr 2015 11:20:48 +0200 Subject: KVM: export __gfn_to_pfn_memslot, drop gfn_to_pfn_async gfn_to_pfn_async is used in just one place, and because of x86-specific treatment that place will need to look at the memory slot. Hence inline it into try_async_pf and export __gfn_to_pfn_memslot. The patch also switches the subsequent call to gfn_to_pfn_prot to use __gfn_to_pfn_memslot. This is a small optimization. Finally, remove the now-unused async argument of __gfn_to_pfn. Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b7a08cd6f4a8..87fd74a04005 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -539,13 +539,13 @@ void kvm_release_page_dirty(struct page *page); void kvm_set_page_accessed(struct page *page); pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn); -pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async, - bool write_fault, bool *writable); pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, bool *writable); pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn); pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn); +pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic, + bool *async, bool write_fault, bool *writable); void kvm_release_pfn_clean(pfn_t pfn); void kvm_set_pfn_dirty(pfn_t pfn); -- cgit v1.2.3 From cad706df7e4a00a595f2662f32c0fc174aa4e61f Mon Sep 17 00:00:00 2001 From: Miroslav Benes Date: Tue, 19 May 2015 12:01:18 +0200 Subject: livepatch: make kobject in klp_object statically allocated Make kobj variable (of type struct kobject) statically allocated in klp_object structure. It will allow us to move in the func-object-patch hierarchy through kobject links. The only reason to have it dynamic was to not have empty release callback in the code. However we have empty callbacks for function and patch in the code now, so it is no longer valid and the advantage of static allocation is clear. Signed-off-by: Miroslav Benes Signed-off-by: Jiri Slaby Acked-by: Josh Poimboeuf Signed-off-by: Jiri Kosina --- include/linux/livepatch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index ee6dbb39a809..fe45f2f02c8d 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -99,7 +99,7 @@ struct klp_object { struct klp_func *funcs; /* internal */ - struct kobject *kobj; + struct kobject kobj; struct module *mod; enum klp_state state; }; -- cgit v1.2.3 From 8cdd043ab32c2ff28d2a77c514a768a9edce244c Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 19 May 2015 12:01:19 +0200 Subject: livepatch: introduce patch/func-walking helpers klp_for_each_object and klp_for_each_func are now used all over the code. One need not think what is the proper condition to check in the for loop now. Signed-off-by: Jiri Slaby Acked-by: Josh Poimboeuf Signed-off-by: Jiri Kosina --- include/linux/livepatch.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index fe45f2f02c8d..31db7a05dd36 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -123,6 +123,12 @@ struct klp_patch { enum klp_state state; }; +#define klp_for_each_object(patch, obj) \ + for (obj = patch->objs; obj->funcs; obj++) + +#define klp_for_each_func(obj, func) \ + for (func = obj->funcs; func->old_name; func++) + int klp_register_patch(struct klp_patch *); int klp_unregister_patch(struct klp_patch *); int klp_enable_patch(struct klp_patch *); -- cgit v1.2.3 From 4990d4fe327b9d9a7a3be7103a82699406fdde69 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 18 May 2015 15:40:29 -0700 Subject: PM / Wakeirq: Add automated device wake IRQ handling Turns out we can automate the handling for the device_may_wakeup() quite a bit by using the kernel wakeup source list as suggested by Rafael J. Wysocki . And as some hardware has separate dedicated wake-up interrupt in addition to the IO interrupt, we can automate the handling by adding a generic threaded interrupt handler that just calls the device PM runtime to wake up the device. This allows dropping code from device drivers as we currently are doing it in multiple ways, and often wrong. For most drivers, we should be able to drop the following boilerplate code from runtime_suspend and runtime_resume functions: ... device_init_wakeup(dev, true); ... if (device_may_wakeup(dev)) enable_irq_wake(irq); ... if (device_may_wakeup(dev)) disable_irq_wake(irq); ... device_init_wakeup(dev, false); ... We can replace it with just the following init and exit time code: ... device_init_wakeup(dev, true); dev_pm_set_wake_irq(dev, irq); ... dev_pm_clear_wake_irq(dev); device_init_wakeup(dev, false); ... And for hardware with dedicated wake-up interrupts: ... device_init_wakeup(dev, true); dev_pm_set_dedicated_wake_irq(dev, irq); ... dev_pm_clear_wake_irq(dev); device_init_wakeup(dev, false); ... Signed-off-by: Tony Lindgren Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 2 ++ include/linux/pm_wakeirq.h | 52 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/pm_wakeup.h | 9 ++++++++ 3 files changed, 63 insertions(+) create mode 100644 include/linux/pm_wakeirq.h (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 2d29c64f8fb1..1c4ed0cb7907 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -529,6 +529,7 @@ enum rpm_request { }; struct wakeup_source; +struct wake_irq; struct pm_domain_data; struct pm_subsys_data { @@ -568,6 +569,7 @@ struct dev_pm_info { unsigned long timer_expires; struct work_struct work; wait_queue_head_t wait_queue; + struct wake_irq *wakeirq; atomic_t usage_count; atomic_t child_count; unsigned int disable_depth:3; diff --git a/include/linux/pm_wakeirq.h b/include/linux/pm_wakeirq.h new file mode 100644 index 000000000000..4046fa1b7d25 --- /dev/null +++ b/include/linux/pm_wakeirq.h @@ -0,0 +1,52 @@ +/* + * pm_wakeirq.h - Device wakeirq helper functions + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _LINUX_PM_WAKEIRQ_H +#define _LINUX_PM_WAKEIRQ_H + +#ifdef CONFIG_PM + +extern int dev_pm_set_wake_irq(struct device *dev, int irq); +extern int dev_pm_set_dedicated_wake_irq(struct device *dev, + int irq); +extern void dev_pm_clear_wake_irq(struct device *dev); +extern void dev_pm_enable_wake_irq(struct device *dev); +extern void dev_pm_disable_wake_irq(struct device *dev); + +#else /* !CONFIG_PM */ + +static inline int dev_pm_set_wake_irq(struct device *dev, int irq) +{ + return 0; +} + +static inline int dev_pm_set_dedicated__wake_irq(struct device *dev, + int irq) +{ + return 0; +} + +static inline void dev_pm_clear_wake_irq(struct device *dev) +{ +} + +static inline void dev_pm_enable_wake_irq(struct device *dev) +{ +} + +static inline void dev_pm_disable_wake_irq(struct device *dev) +{ +} + +#endif /* CONFIG_PM */ +#endif /* _LINUX_PM_WAKEIRQ_H */ diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index a0f70808d7f4..a3447932df1f 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -28,9 +28,17 @@ #include +struct wake_irq; + /** * struct wakeup_source - Representation of wakeup sources * + * @name: Name of the wakeup source + * @entry: Wakeup source list entry + * @lock: Wakeup source lock + * @wakeirq: Optional device specific wakeirq + * @timer: Wakeup timer list + * @timer_expires: Wakeup timer expiration * @total_time: Total time this wakeup source has been active. * @max_time: Maximum time this wakeup source has been continuously active. * @last_time: Monotonic clock when the wakeup source's was touched last time. @@ -47,6 +55,7 @@ struct wakeup_source { const char *name; struct list_head entry; spinlock_t lock; + struct wake_irq *wakeirq; struct timer_list timer; unsigned long timer_expires; ktime_t total_time; -- cgit v1.2.3 From ecc8617053e0a97272ef2eee138809f30080e84b Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 30 Mar 2015 16:20:03 -0700 Subject: module: add extra argument for parse_params() callback This adds an extra argument onto parse_params() to be used as a way to make the unused callback a bit more useful and generic by allowing the caller to pass on a data structure of its choice. An example use case is to allow us to easily make module parameters for every module which we will do next. @ parse @ identifier name, args, params, num, level_min, level_max; identifier unknown, param, val, doing; type s16; @@ extern char *parse_args(const char *name, char *args, const struct kernel_param *params, unsigned num, s16 level_min, s16 level_max, + void *arg, int (*unknown)(char *param, char *val, const char *doing + , void *arg )); @ parse_mod @ identifier name, args, params, num, level_min, level_max; identifier unknown, param, val, doing; type s16; @@ char *parse_args(const char *name, char *args, const struct kernel_param *params, unsigned num, s16 level_min, s16 level_max, + void *arg, int (*unknown)(char *param, char *val, const char *doing + , void *arg )) { ... } @ parse_args_found @ expression R, E1, E2, E3, E4, E5, E6; identifier func; @@ ( R = parse_args(E1, E2, E3, E4, E5, E6, + NULL, func); | R = parse_args(E1, E2, E3, E4, E5, E6, + NULL, &func); | R = parse_args(E1, E2, E3, E4, E5, E6, + NULL, NULL); | parse_args(E1, E2, E3, E4, E5, E6, + NULL, func); | parse_args(E1, E2, E3, E4, E5, E6, + NULL, &func); | parse_args(E1, E2, E3, E4, E5, E6, + NULL, NULL); ) @ parse_args_unused depends on parse_args_found @ identifier parse_args_found.func; @@ int func(char *param, char *val, const char *unused + , void *arg ) { ... } @ mod_unused depends on parse_args_found @ identifier parse_args_found.func; expression A1, A2, A3; @@ - func(A1, A2, A3); + func(A1, A2, A3, NULL); Generated-by: Coccinelle SmPL Cc: cocci@systeme.lip6.fr Cc: Tejun Heo Cc: Arjan van de Ven Cc: Greg Kroah-Hartman Cc: Rusty Russell Cc: Christoph Hellwig Cc: Felipe Contreras Cc: Ewan Milne Cc: Jean Delvare Cc: Hannes Reinecke Cc: Jani Nikula Cc: linux-kernel@vger.kernel.org Reviewed-by: Tejun Heo Acked-by: Rusty Russell Signed-off-by: Luis R. Rodriguez Signed-off-by: Greg Kroah-Hartman --- include/linux/moduleparam.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 1c9effa25e26..13923709d30d 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -357,8 +357,9 @@ extern char *parse_args(const char *name, unsigned num, s16 level_min, s16 level_max, + void *arg, int (*unknown)(char *param, char *val, - const char *doing)); + const char *doing, void *arg)); /* Called by module remove. */ #ifdef CONFIG_SYSFS -- cgit v1.2.3 From 765230b5f084863183aa8adb3405ab3f32c0b16e Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 30 Mar 2015 16:20:04 -0700 Subject: driver-core: add asynchronous probing support for drivers Some devices take a long time when initializing, and not all drivers are suited to initialize their devices when they are open. For example, input drivers need to interrogate their devices in order to publish device's capabilities before userspace will open them. When such drivers are compiled into kernel they may stall entire kernel initialization. This change allows drivers request for their probe functions to be called asynchronously during driver and device registration (manual binding is still synchronous). Because async_schedule is used to perform asynchronous calls module loading will still wait for the probing to complete. Note that the end goal is to make the probing asynchronous by default, so annotating drivers with PROBE_PREFER_ASYNCHRONOUS is a temporary measure that allows us to speed up boot process while we validating and fixing the rest of the drivers and preparing userspace. This change is based on earlier patch by "Luis R. Rodriguez" Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 6558af90c8fe..7857b46c548b 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -195,6 +195,31 @@ extern int bus_unregister_notifier(struct bus_type *bus, extern struct kset *bus_get_kset(struct bus_type *bus); extern struct klist *bus_get_device_klist(struct bus_type *bus); +/** + * enum probe_type - device driver probe type to try + * Device drivers may opt in for special handling of their + * respective probe routines. This tells the core what to + * expect and prefer. + * + * @PROBE_SYNCHRONOUS: Default. Drivers expect their probe routines + * to run synchronously with driver and device registration + * (with the exception of -EPROBE_DEFER handling - re-probing + * always ends up being done asynchronously). + * @PROBE_PREFER_ASYNCHRONOUS: Drivers for "slow" devices which + * probing order is not essential for booting the system may + * opt into executing their probes asynchronously. + * + * Note that the end goal is to switch the kernel to use asynchronous + * probing by default, so annotating drivers with + * %PROBE_PREFER_ASYNCHRONOUS is a temporary measure that allows us + * to speed up boot process while we are validating the rest of the + * drivers. + */ +enum probe_type { + PROBE_SYNCHRONOUS, + PROBE_PREFER_ASYNCHRONOUS, +}; + /** * struct device_driver - The basic device driver structure * @name: Name of the device driver. @@ -202,6 +227,7 @@ extern struct klist *bus_get_device_klist(struct bus_type *bus); * @owner: The module owner. * @mod_name: Used for built-in modules. * @suppress_bind_attrs: Disables bind/unbind via sysfs. + * @probe_type: Type of the probe (synchronous or asynchronous) to use. * @of_match_table: The open firmware table. * @acpi_match_table: The ACPI match table. * @probe: Called to query the existence of a specific device, @@ -235,6 +261,7 @@ struct device_driver { const char *mod_name; /* used for built-in modules */ bool suppress_bind_attrs; /* disables bind/unbind via sysfs */ + enum probe_type probe_type; const struct of_device_id *of_match_table; const struct acpi_device_id *acpi_match_table; @@ -975,6 +1002,7 @@ extern int __must_check device_bind_driver(struct device *dev); extern void device_release_driver(struct device *dev); extern int __must_check device_attach(struct device *dev); extern int __must_check driver_attach(struct device_driver *drv); +extern void device_initial_probe(struct device *dev); extern int __must_check device_reprobe(struct device *dev); /* -- cgit v1.2.3 From f2411da746985e60d4d087f3a43e271c61785927 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 30 Mar 2015 16:20:05 -0700 Subject: driver-core: add driver module asynchronous probe support Some init systems may wish to express the desire to have device drivers run their probe() code asynchronously. This implements support for this and allows userspace to request async probe as a preference through a generic shared device driver module parameter, async_probe. Implementation for async probe is supported through a module parameter given that since synchronous probe has been prevalent for years some userspace might exist which relies on the fact that the device driver will probe synchronously and the assumption that devices it provides will be immediately available after this. Signed-off-by: Luis R. Rodriguez Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 8 +++++--- include/linux/module.h | 2 ++ 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 7857b46c548b..77b7cd9e5467 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -201,10 +201,12 @@ extern struct klist *bus_get_device_klist(struct bus_type *bus); * respective probe routines. This tells the core what to * expect and prefer. * - * @PROBE_SYNCHRONOUS: Default. Drivers expect their probe routines + * @PROBE_DEFAULT_STRATEGY: Drivers expect their probe routines * to run synchronously with driver and device registration * (with the exception of -EPROBE_DEFER handling - re-probing - * always ends up being done asynchronously). + * always ends up being done asynchronously) unless user + * explicitly requested asynchronous probing via module + * parameter. * @PROBE_PREFER_ASYNCHRONOUS: Drivers for "slow" devices which * probing order is not essential for booting the system may * opt into executing their probes asynchronously. @@ -216,7 +218,7 @@ extern struct klist *bus_get_device_klist(struct bus_type *bus); * drivers. */ enum probe_type { - PROBE_SYNCHRONOUS, + PROBE_DEFAULT_STRATEGY, PROBE_PREFER_ASYNCHRONOUS, }; diff --git a/include/linux/module.h b/include/linux/module.h index c883b86ea964..f46a47d3c0dc 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -257,6 +257,8 @@ struct module { bool sig_ok; #endif + bool async_probe_requested; + /* symbols that will be GPL-only in the near future. */ const struct kernel_symbol *gpl_future_syms; const unsigned long *gpl_future_crcs; -- cgit v1.2.3 From d173a137c5bd95ee29d02705e5fa8890ef149718 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 30 Mar 2015 16:20:06 -0700 Subject: driver-core: enable drivers to opt-out of async probe There are drivers that can not be probed asynchronously. One such group is platform drivers registered with platform_driver_probe(), which expects driver's probe routine be discarded after the driver has been registered and initial binding attempt executed. Also platform_driver_probe() an error when no devices were bound to the driver, allowing failing to load such driver module altogether. Other drivers do not work well with asynchronous probing because of driver bug or not optimal driver organization. To allow using such drivers even when user requests asynchronous probing as default boot strategy, let's allow them to opt out. Signed-off-by: Luis R. Rodriguez Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 77b7cd9e5467..00ac57c26615 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -201,15 +201,15 @@ extern struct klist *bus_get_device_klist(struct bus_type *bus); * respective probe routines. This tells the core what to * expect and prefer. * - * @PROBE_DEFAULT_STRATEGY: Drivers expect their probe routines - * to run synchronously with driver and device registration - * (with the exception of -EPROBE_DEFER handling - re-probing - * always ends up being done asynchronously) unless user - * explicitly requested asynchronous probing via module - * parameter. + * @PROBE_DEFAULT_STRATEGY: Used by drivers that work equally well + * whether probed synchronously or asynchronously. * @PROBE_PREFER_ASYNCHRONOUS: Drivers for "slow" devices which * probing order is not essential for booting the system may * opt into executing their probes asynchronously. + * @PROBE_FORCE_SYNCHRONOUS: Use this to annotate drivers that need + * their probe routines to run synchronously with driver and + * device registration (with the exception of -EPROBE_DEFER + * handling - re-probing always ends up being done asynchronously). * * Note that the end goal is to switch the kernel to use asynchronous * probing by default, so annotating drivers with @@ -220,6 +220,7 @@ extern struct klist *bus_get_device_klist(struct bus_type *bus); enum probe_type { PROBE_DEFAULT_STRATEGY, PROBE_PREFER_ASYNCHRONOUS, + PROBE_FORCE_SYNCHRONOUS, }; /** -- cgit v1.2.3 From ec0ccc16a09fc32f7142ef3ddf1c2276fbbb35d0 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 30 Mar 2015 16:20:09 -0700 Subject: module: add core_param_unsafe Similarly to module_param_unsafe(), add the helper to be used by core code wishing to expose unsafe debugging or testing parameters that taint the kernel when set. Acked-by: Rusty Russell Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- include/linux/moduleparam.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 13923709d30d..6480dcaca275 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -310,6 +310,15 @@ static inline void __kernel_param_unlock(void) #define core_param(name, var, type, perm) \ param_check_##type(name, &(var)); \ __module_param_call("", name, ¶m_ops_##type, &var, perm, -1, 0) + +/** + * core_param_unsafe - same as core_param but taints kernel + */ +#define core_param_unsafe(name, var, type, perm) \ + param_check_##type(name, &(var)); \ + __module_param_call("", name, ¶m_ops_##type, &var, perm, \ + -1, KERNEL_PARAM_FL_UNSAFE) + #endif /* !MODULE */ /** -- cgit v1.2.3 From faecbb45ebefb20260ad4a631e011e93c896cb73 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 20 May 2015 13:42:25 +0200 Subject: Revert "netfilter: bridge: query conntrack about skb dnat" This reverts commit c055d5b03bb4cb69d349d787c9787c0383abd8b2. There are two issues: 'dnat_took_place' made me think that this is related to -j DNAT/MASQUERADE. But thats only one part of the story. This is also relevant for SNAT when we undo snat translation in reverse/reply direction. Furthermore, I originally wanted to do this mainly to avoid storing ipv6 addresses once we make DNAT/REDIRECT work for ipv6 on bridges. However, I forgot about SNPT/DNPT which is stateless. So we can't escape storing address for ipv6 anyway. Might as well do it for ipv4 too. Reported-and-tested-by: Bernhard Thaler Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 66e374d62f64..f15154a879c7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -176,6 +176,7 @@ struct nf_bridge_info { struct net_device *physindev; struct net_device *physoutdev; char neigh_header[8]; + __be32 ipv4_daddr; }; #endif -- cgit v1.2.3 From 985eba3aba77a997f65109e1418837b1d8b8512a Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 3 Dec 2014 16:46:14 +0100 Subject: mfd: syscon: Add Atmel MC (Memory Controller) registers definition The at91rm9200 SoC embeds a Memory Controller block which is used to configure several aspects of the platform: - AHB/APB Bus behavior - SDRAM Controller - EBI (External Bus Interface) and SMC (Static Memory Controller) config Those registers might be accessed by different drivers, hence we need to define it as a syscon device. Signed-off-by: Boris Brezillon Signed-off-by: Alexandre Belloni Acked-by: Lee Jones Acked-by: Nicolas Ferre --- include/linux/mfd/syscon/atmel-mc.h | 144 ++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 include/linux/mfd/syscon/atmel-mc.h (limited to 'include/linux') diff --git a/include/linux/mfd/syscon/atmel-mc.h b/include/linux/mfd/syscon/atmel-mc.h new file mode 100644 index 000000000000..afd9b8f1e363 --- /dev/null +++ b/include/linux/mfd/syscon/atmel-mc.h @@ -0,0 +1,144 @@ +/* + * Copyright (C) 2005 Ivan Kokshaysky + * Copyright (C) SAN People + * + * Memory Controllers (MC, EBI, SMC, SDRAMC, BFC) - System peripherals + * registers. + * Based on AT91RM9200 datasheet revision E. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _LINUX_MFD_SYSCON_ATMEL_MC_H_ +#define _LINUX_MFD_SYSCON_ATMEL_MC_H_ + +/* Memory Controller */ +#define AT91_MC_RCR 0x00 +#define AT91_MC_RCB BIT(0) + +#define AT91_MC_ASR 0x04 +#define AT91_MC_UNADD BIT(0) +#define AT91_MC_MISADD BIT(1) +#define AT91_MC_ABTSZ GENMASK(9, 8) +#define AT91_MC_ABTSZ_BYTE (0 << 8) +#define AT91_MC_ABTSZ_HALFWORD (1 << 8) +#define AT91_MC_ABTSZ_WORD (2 << 8) +#define AT91_MC_ABTTYP GENMASK(11, 10) +#define AT91_MC_ABTTYP_DATAREAD (0 << 10) +#define AT91_MC_ABTTYP_DATAWRITE (1 << 10) +#define AT91_MC_ABTTYP_FETCH (2 << 10) +#define AT91_MC_MST(n) BIT(16 + (n)) +#define AT91_MC_SVMST(n) BIT(24 + (n)) + +#define AT91_MC_AASR 0x08 + +#define AT91_MC_MPR 0x0c +#define AT91_MPR_MSTP(n) GENMASK(2 + ((x) * 4), ((x) * 4)) + +/* External Bus Interface (EBI) registers */ +#define AT91_MC_EBI_CSA 0x60 +#define AT91_MC_EBI_CS(n) BIT(x) +#define AT91_MC_EBI_NUM_CS 8 + +#define AT91_MC_EBI_CFGR 0x64 +#define AT91_MC_EBI_DBPUC BIT(0) + +/* Static Memory Controller (SMC) registers */ +#define AT91_MC_SMC_CSR(n) (0x70 + ((n) * 4)) +#define AT91_MC_SMC_NWS GENMASK(6, 0) +#define AT91_MC_SMC_NWS_(x) ((x) << 0) +#define AT91_MC_SMC_WSEN BIT(7) +#define AT91_MC_SMC_TDF GENMASK(11, 8) +#define AT91_MC_SMC_TDF_(x) ((x) << 8) +#define AT91_MC_SMC_TDF_MAX 0xf +#define AT91_MC_SMC_BAT BIT(12) +#define AT91_MC_SMC_DBW GENMASK(14, 13) +#define AT91_MC_SMC_DBW_16 (1 << 13) +#define AT91_MC_SMC_DBW_8 (2 << 13) +#define AT91_MC_SMC_DPR BIT(15) +#define AT91_MC_SMC_ACSS GENMASK(17, 16) +#define AT91_MC_SMC_ACSS_(x) ((x) << 16) +#define AT91_MC_SMC_ACSS_MAX 3 +#define AT91_MC_SMC_RWSETUP GENMASK(26, 24) +#define AT91_MC_SMC_RWSETUP_(x) ((x) << 24) +#define AT91_MC_SMC_RWHOLD GENMASK(30, 28) +#define AT91_MC_SMC_RWHOLD_(x) ((x) << 28) +#define AT91_MC_SMC_RWHOLDSETUP_MAX 7 + +/* SDRAM Controller registers */ +#define AT91_MC_SDRAMC_MR 0x90 +#define AT91_MC_SDRAMC_MODE GENMASK(3, 0) +#define AT91_MC_SDRAMC_MODE_NORMAL (0 << 0) +#define AT91_MC_SDRAMC_MODE_NOP (1 << 0) +#define AT91_MC_SDRAMC_MODE_PRECHARGE (2 << 0) +#define AT91_MC_SDRAMC_MODE_LMR (3 << 0) +#define AT91_MC_SDRAMC_MODE_REFRESH (4 << 0) +#define AT91_MC_SDRAMC_DBW_16 BIT(4) + +#define AT91_MC_SDRAMC_TR 0x94 +#define AT91_MC_SDRAMC_COUNT GENMASK(11, 0) + +#define AT91_MC_SDRAMC_CR 0x98 +#define AT91_MC_SDRAMC_NC GENMASK(1, 0) +#define AT91_MC_SDRAMC_NC_8 (0 << 0) +#define AT91_MC_SDRAMC_NC_9 (1 << 0) +#define AT91_MC_SDRAMC_NC_10 (2 << 0) +#define AT91_MC_SDRAMC_NC_11 (3 << 0) +#define AT91_MC_SDRAMC_NR GENMASK(3, 2) +#define AT91_MC_SDRAMC_NR_11 (0 << 2) +#define AT91_MC_SDRAMC_NR_12 (1 << 2) +#define AT91_MC_SDRAMC_NR_13 (2 << 2) +#define AT91_MC_SDRAMC_NB BIT(4) +#define AT91_MC_SDRAMC_NB_2 (0 << 4) +#define AT91_MC_SDRAMC_NB_4 (1 << 4) +#define AT91_MC_SDRAMC_CAS GENMASK(6, 5) +#define AT91_MC_SDRAMC_CAS_2 (2 << 5) +#define AT91_MC_SDRAMC_TWR GENMASK(10, 7) +#define AT91_MC_SDRAMC_TRC GENMASK(14, 11) +#define AT91_MC_SDRAMC_TRP GENMASK(18, 15) +#define AT91_MC_SDRAMC_TRCD GENMASK(22, 19) +#define AT91_MC_SDRAMC_TRAS GENMASK(26, 23) +#define AT91_MC_SDRAMC_TXSR GENMASK(30, 27) + +#define AT91_MC_SDRAMC_SRR 0x9c +#define AT91_MC_SDRAMC_SRCB BIT(0) + +#define AT91_MC_SDRAMC_LPR 0xa0 +#define AT91_MC_SDRAMC_LPCB BIT(0) + +#define AT91_MC_SDRAMC_IER 0xa4 +#define AT91_MC_SDRAMC_IDR 0xa8 +#define AT91_MC_SDRAMC_IMR 0xac +#define AT91_MC_SDRAMC_ISR 0xb0 +#define AT91_MC_SDRAMC_RES BIT(0) + +/* Burst Flash Controller register */ +#define AT91_MC_BFC_MR 0xc0 +#define AT91_MC_BFC_BFCOM GENMASK(1, 0) +#define AT91_MC_BFC_BFCOM_DISABLED (0 << 0) +#define AT91_MC_BFC_BFCOM_ASYNC (1 << 0) +#define AT91_MC_BFC_BFCOM_BURST (2 << 0) +#define AT91_MC_BFC_BFCC GENMASK(3, 2) +#define AT91_MC_BFC_BFCC_MCK (1 << 2) +#define AT91_MC_BFC_BFCC_DIV2 (2 << 2) +#define AT91_MC_BFC_BFCC_DIV4 (3 << 2) +#define AT91_MC_BFC_AVL GENMASK(7, 4) +#define AT91_MC_BFC_PAGES GENMASK(10, 8) +#define AT91_MC_BFC_PAGES_NO_PAGE (0 << 8) +#define AT91_MC_BFC_PAGES_16 (1 << 8) +#define AT91_MC_BFC_PAGES_32 (2 << 8) +#define AT91_MC_BFC_PAGES_64 (3 << 8) +#define AT91_MC_BFC_PAGES_128 (4 << 8) +#define AT91_MC_BFC_PAGES_256 (5 << 8) +#define AT91_MC_BFC_PAGES_512 (6 << 8) +#define AT91_MC_BFC_PAGES_1024 (7 << 8) +#define AT91_MC_BFC_OEL GENMASK(13, 12) +#define AT91_MC_BFC_BAAEN BIT(16) +#define AT91_MC_BFC_BFOEH BIT(17) +#define AT91_MC_BFC_MUXEN BIT(18) +#define AT91_MC_BFC_RDYEN BIT(19) + +#endif /* _LINUX_MFD_SYSCON_ATMEL_MC_H_ */ -- cgit v1.2.3 From be32417796c2b8a83fe4cbece83bea96ab9e378f Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Wed, 6 May 2015 12:26:22 +0800 Subject: block: export blkdev_reread_part() and __blkdev_reread_part() This patch exports blkdev_reread_part() for block drivers, also introduce __blkdev_reread_part(). For some drivers, such as loop, reread of partitions can be run from the release path, and bd_mutex may already be held prior to calling ioctl_by_bdev(bdev, BLKRRPART, 0), so introduce __blkdev_reread_part for use in such cases. CC: Christoph Hellwig CC: Jens Axboe CC: Tejun Heo CC: Alexander Viro CC: Markus Pargmann CC: Stefan Weinhuber CC: Stefan Haberland CC: Sebastian Ott CC: Fabian Frederick CC: Ming Lei CC: David Herrmann CC: Andrew Morton CC: Peter Zijlstra CC: nbd-general@lists.sourceforge.net CC: linux-s390@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Jarod Wilson Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 35ec87e490b1..1ef63900243c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2279,6 +2279,9 @@ extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder); extern void blkdev_put(struct block_device *bdev, fmode_t mode); +extern int __blkdev_reread_part(struct block_device *bdev); +extern int blkdev_reread_part(struct block_device *bdev); + #ifdef CONFIG_SYSFS extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); extern void bd_unlink_disk_holder(struct block_device *bdev, -- cgit v1.2.3 From 6e844b035360294636271f4f0fd4a5a685e03c06 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 17 Apr 2015 10:45:55 -0700 Subject: ARM: BCM63xx: Add Broadcom BCM63xx PMB controller helpers This patch adds both common register definitions and helper functions used to issue read/write commands to the Broadcom BCM63xx PMB controller which is used to power on and release from reset internal on-chip peripherals such as the integrated Ethernet switch, AHCI, USB, as well as the secondary CPU core. This is going to be utilized by the BCM63138 SMP code, as well as by the BCM63138 reset controller later. Signed-off-by: Florian Fainelli --- include/linux/reset/bcm63xx_pmb.h | 88 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 include/linux/reset/bcm63xx_pmb.h (limited to 'include/linux') diff --git a/include/linux/reset/bcm63xx_pmb.h b/include/linux/reset/bcm63xx_pmb.h new file mode 100644 index 000000000000..bb4af7b5eb36 --- /dev/null +++ b/include/linux/reset/bcm63xx_pmb.h @@ -0,0 +1,88 @@ +/* + * Broadcom BCM63xx Processor Monitor Bus shared routines (SMP and reset) + * + * Copyright (C) 2015, Broadcom Corporation + * Author: Florian Fainelli + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation version 2. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#ifndef __BCM63XX_PMB_H +#define __BCM63XX_PMB_H + +#include +#include +#include +#include + +/* PMB Master controller register */ +#define PMB_CTRL 0x00 +#define PMC_PMBM_START (1 << 31) +#define PMC_PMBM_TIMEOUT (1 << 30) +#define PMC_PMBM_SLAVE_ERR (1 << 29) +#define PMC_PMBM_BUSY (1 << 28) +#define PMC_PMBM_READ (0 << 20) +#define PMC_PMBM_WRITE (1 << 20) +#define PMB_WR_DATA 0x04 +#define PMB_TIMEOUT 0x08 +#define PMB_RD_DATA 0x0C + +#define PMB_BUS_ID_SHIFT 8 + +/* Perform the low-level PMB master operation, shared between reads and + * writes. + */ +static inline int __bpcm_do_op(void __iomem *master, unsigned int addr, + u32 off, u32 op) +{ + unsigned int timeout = 1000; + u32 cmd; + + cmd = (PMC_PMBM_START | op | (addr & 0xff) << 12 | off); + writel(cmd, master + PMB_CTRL); + do { + cmd = readl(master + PMB_CTRL); + if (!(cmd & PMC_PMBM_START)) + return 0; + + if (cmd & PMC_PMBM_SLAVE_ERR) + return -EIO; + + if (cmd & PMC_PMBM_TIMEOUT) + return -ETIMEDOUT; + + udelay(1); + } while (timeout-- > 0); + + return -ETIMEDOUT; +} + +static inline int bpcm_rd(void __iomem *master, unsigned int addr, + u32 off, u32 *val) +{ + int ret = 0; + + ret = __bpcm_do_op(master, addr, off >> 2, PMC_PMBM_READ); + *val = readl(master + PMB_RD_DATA); + + return ret; +} + +static inline int bpcm_wr(void __iomem *master, unsigned int addr, + u32 off, u32 val) +{ + int ret = 0; + + writel(val, master + PMB_WR_DATA); + ret = __bpcm_do_op(master, addr, off >> 2, PMC_PMBM_WRITE); + + return ret; +} + +#endif /* __BCM63XX_PMB_H */ -- cgit v1.2.3 From 7f1a57fdd6cb6e7be2ed31878a34655df38e1861 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 19 May 2015 16:13:02 +0900 Subject: power_supply: Fix possible NULL pointer dereference on early uevent Don't call the power_supply_changed() from power_supply_register() when parent is still probing because it may lead to accessing parent too early. In bq27x00_battery this caused NULL pointer exception because uevent of power_supply_changed called back the the get_property() method provided by the driver. The get_property() method accessed pointer which should be returned by power_supply_register(). Starting from bq27x00_battery_probe(): di->bat = power_supply_register() power_supply_changed() kobject_uevent() power_supply_uevent() power_supply_show_property() power_supply_get_property() bq27x00_battery_get_property() dereference of di->bat which is NULL here The dereference of di->bat (value returned by power_supply_register()) is the currently visible problem. However calling back the methods provided by driver before ending the probe may lead to accessing other driver-related data which is not yet initialized. The call to power_supply_changed() is postponed till probing ends - mutex of parent device is released. Reported-by: H. Nikolaus Schaller Signed-off-by: Krzysztof Kozlowski Fixes: 297d716f6260 ("power_supply: Change ownership from driver to core") Tested-By: Dr. H. Nikolaus Schaller Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 75a1dd8dc56e..a80f1fd01ddb 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -237,6 +237,7 @@ struct power_supply { /* private */ struct device dev; struct work_struct changed_work; + struct delayed_work deferred_register_work; spinlock_t changed_lock; bool changed; atomic_t use_cnt; -- cgit v1.2.3 From 04fd61ab36ec065e194ab5e74ae34a5240d992bb Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 19 May 2015 16:59:03 -0700 Subject: bpf: allow bpf programs to tail-call other bpf programs introduce bpf_tail_call(ctx, &jmp_table, index) helper function which can be used from BPF programs like: int bpf_prog(struct pt_regs *ctx) { ... bpf_tail_call(ctx, &jmp_table, index); ... } that is roughly equivalent to: int bpf_prog(struct pt_regs *ctx) { ... if (jmp_table[index]) return (*jmp_table[index])(ctx); ... } The important detail that it's not a normal call, but a tail call. The kernel stack is precious, so this helper reuses the current stack frame and jumps into another BPF program without adding extra call frame. It's trivially done in interpreter and a bit trickier in JITs. In case of x64 JIT the bigger part of generated assembler prologue is common for all programs, so it is simply skipped while jumping. Other JITs can do similar prologue-skipping optimization or do stack unwind before jumping into the next program. bpf_tail_call() arguments: ctx - context pointer jmp_table - one of BPF_MAP_TYPE_PROG_ARRAY maps used as the jump table index - index in the jump table Since all BPF programs are idenitified by file descriptor, user space need to populate the jmp_table with FDs of other BPF programs. If jmp_table[index] is empty the bpf_tail_call() doesn't jump anywhere and program execution continues as normal. New BPF_MAP_TYPE_PROG_ARRAY map type is introduced so that user space can populate this jmp_table array with FDs of other bpf programs. Programs can share the same jmp_table array or use multiple jmp_tables. The chain of tail calls can form unpredictable dynamic loops therefore tail_call_cnt is used to limit the number of calls and currently is set to 32. Use cases: Acked-by: Daniel Borkmann ========== - simplify complex programs by splitting them into a sequence of small programs - dispatch routine For tracing and future seccomp the program may be triggered on all system calls, but processing of syscall arguments will be different. It's more efficient to implement them as: int syscall_entry(struct seccomp_data *ctx) { bpf_tail_call(ctx, &syscall_jmp_table, ctx->nr /* syscall number */); ... default: process unknown syscall ... } int sys_write_event(struct seccomp_data *ctx) {...} int sys_read_event(struct seccomp_data *ctx) {...} syscall_jmp_table[__NR_write] = sys_write_event; syscall_jmp_table[__NR_read] = sys_read_event; For networking the program may call into different parsers depending on packet format, like: int packet_parser(struct __sk_buff *skb) { ... parse L2, L3 here ... __u8 ipproto = load_byte(skb, ... offsetof(struct iphdr, protocol)); bpf_tail_call(skb, &ipproto_jmp_table, ipproto); ... default: process unknown protocol ... } int parse_tcp(struct __sk_buff *skb) {...} int parse_udp(struct __sk_buff *skb) {...} ipproto_jmp_table[IPPROTO_TCP] = parse_tcp; ipproto_jmp_table[IPPROTO_UDP] = parse_udp; - for TC use case, bpf_tail_call() allows to implement reclassify-like logic - bpf_map_update_elem/delete calls into BPF_MAP_TYPE_PROG_ARRAY jump table are atomic, so user space can build chains of BPF programs on the fly Implementation details: ======================= - high performance of bpf_tail_call() is the goal. It could have been implemented without JIT changes as a wrapper on top of BPF_PROG_RUN() macro, but with two downsides: . all programs would have to pay performance penalty for this feature and tail call itself would be slower, since mandatory stack unwind, return, stack allocate would be done for every tailcall. . tailcall would be limited to programs running preempt_disabled, since generic 'void *ctx' doesn't have room for 'tail_call_cnt' and it would need to be either global per_cpu variable accessed by helper and by wrapper or global variable protected by locks. In this implementation x64 JIT bypasses stack unwind and jumps into the callee program after prologue. - bpf_prog_array_compatible() ensures that prog_type of callee and caller are the same and JITed/non-JITed flag is the same, since calling JITed program from non-JITed is invalid, since stack frames are different. Similarly calling kprobe type program from socket type program is invalid. - jump table is implemented as BPF_MAP_TYPE_PROG_ARRAY to reuse 'map' abstraction, its user space API and all of verifier logic. It's in the existing arraymap.c file, since several functions are shared with regular array map. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 22 ++++++++++++++++++++++ include/linux/filter.h | 2 +- 2 files changed, 23 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d5cda067115a..8821b9a8689e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -126,6 +126,27 @@ struct bpf_prog_aux { struct work_struct work; }; +struct bpf_array { + struct bpf_map map; + u32 elem_size; + /* 'ownership' of prog_array is claimed by the first program that + * is going to use this map or by the first program which FD is stored + * in the map to make sure that all callers and callees have the same + * prog_type and JITed flag + */ + enum bpf_prog_type owner_prog_type; + bool owner_jited; + union { + char value[0] __aligned(8); + struct bpf_prog *prog[0] __aligned(8); + }; +}; +#define MAX_TAIL_CALL_CNT 32 + +u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); +void bpf_prog_array_map_clear(struct bpf_map *map); +bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); + #ifdef CONFIG_BPF_SYSCALL void bpf_register_prog_type(struct bpf_prog_type_list *tl); void bpf_register_map_type(struct bpf_map_type_list *tl); @@ -160,5 +181,6 @@ extern const struct bpf_func_proto bpf_map_delete_elem_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; +extern const struct bpf_func_proto bpf_tail_call_proto; #endif /* _LINUX_BPF_H */ diff --git a/include/linux/filter.h b/include/linux/filter.h index 200be4a74a33..17724f6ea983 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -378,7 +378,7 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) int sk_filter(struct sock *sk, struct sk_buff *skb); -void bpf_prog_select_runtime(struct bpf_prog *fp); +int bpf_prog_select_runtime(struct bpf_prog *fp); void bpf_prog_free(struct bpf_prog *fp); struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags); -- cgit v1.2.3 From 37b1ef31a568fc02e53587620226e5f3c66454c8 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 20 May 2015 14:41:19 +0800 Subject: workqueue: move flush_scheduled_work() to workqueue.h flush_scheduled_work() is just a simple call to flush_work(). Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 4618dd672d1b..738b30b39b68 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -435,7 +435,6 @@ extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, extern void flush_workqueue(struct workqueue_struct *wq); extern void drain_workqueue(struct workqueue_struct *wq); -extern void flush_scheduled_work(void); extern int schedule_on_each_cpu(work_func_t func); @@ -531,6 +530,35 @@ static inline bool schedule_work(struct work_struct *work) return queue_work(system_wq, work); } +/** + * flush_scheduled_work - ensure that any scheduled work has run to completion. + * + * Forces execution of the kernel-global workqueue and blocks until its + * completion. + * + * Think twice before calling this function! It's very easy to get into + * trouble if you don't take great care. Either of the following situations + * will lead to deadlock: + * + * One of the work items currently on the workqueue needs to acquire + * a lock held by your code or its caller. + * + * Your code is running in the context of a work routine. + * + * They will be detected by lockdep when they occur, but the first might not + * occur very often. It depends on what work items are on the workqueue and + * what locks they need, which you have no control over. + * + * In most situations flushing the entire workqueue is overkill; you merely + * need to know that a particular work item isn't queued and isn't running. + * In such cases you should use cancel_delayed_work_sync() or + * cancel_work_sync() instead. + */ +static inline void flush_scheduled_work(void) +{ + flush_workqueue(system_wq); +} + /** * schedule_delayed_work_on - queue work in global workqueue on CPU after delay * @cpu: cpu to use -- cgit v1.2.3 From 2efd055c53c06b7e89c167c98069bab9afce7e59 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 20 May 2015 16:35:41 -0700 Subject: tcp: add tcpi_segs_in and tcpi_segs_out to tcp_info This patch tracks the total number of inbound and outbound segments on a TCP socket. One may use this number to have an idea on connection quality when compared against the retransmissions. RFC4898 named these : tcpEStatsPerfSegsIn and tcpEStatsPerfSegsOut These are a 32bit field each and can be fetched both from TCP_INFO getsockopt() if one has a handle on a TCP socket, or from inet_diag netlink facility (iproute2/ss patch will follow) Note that tp->segs_out was placed near tp->snd_nxt for good data locality and minimal performance impact, while tp->segs_in was placed near tp->bytes_received for the same reason. Join work with Eric Dumazet. Note that received SYN are accounted on the listener, but sent SYNACK are not accounted. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index e6fb5df22db1..f0212026c77f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -149,11 +149,16 @@ struct tcp_sock { * sum(delta(rcv_nxt)), or how many bytes * were acked. */ + u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn + * total number of segments in. + */ u32 rcv_nxt; /* What we want to receive next */ u32 copied_seq; /* Head of yet unread data */ u32 rcv_wup; /* rcv_nxt on last window update sent */ u32 snd_nxt; /* Next sequence we send */ - + u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut + * The total number of segments sent. + */ u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked * sum(delta(snd_una)), or how many bytes * were acked. -- cgit v1.2.3 From 2d0f230fe0649758394466cb69b553c0e8184df9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 21 May 2015 15:11:02 +0800 Subject: crypto: aead - Rename aead_alg to old_aead_alg This patch is the first step in the introduction of a new AEAD alg type. Unlike normal conversions this patch only renames the existing aead_alg structure because there are external references to it. Those references will be removed after this patch. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 59ca4086ce6a..7d290a91c6f9 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -268,7 +268,7 @@ struct ablkcipher_alg { }; /** - * struct aead_alg - AEAD cipher definition + * struct old_aead_alg - AEAD cipher definition * @maxauthsize: Set the maximum authentication tag size supported by the * transformation. A transformation may support smaller tag sizes. * As the authentication tag is a message digest to ensure the @@ -293,7 +293,7 @@ struct ablkcipher_alg { * All fields except @givencrypt , @givdecrypt , @geniv and @ivsize are * mandatory and must be filled. */ -struct aead_alg { +struct old_aead_alg { int (*setkey)(struct crypto_aead *tfm, const u8 *key, unsigned int keylen); int (*setauthsize)(struct crypto_aead *tfm, unsigned int authsize); @@ -501,7 +501,7 @@ struct crypto_alg { union { struct ablkcipher_alg ablkcipher; - struct aead_alg aead; + struct old_aead_alg aead; struct blkcipher_alg blkcipher; struct cipher_alg cipher; struct compress_alg compress; -- cgit v1.2.3 From 2a9de9c0f08d61fbe3764a21d22d0b72df97d6ae Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Fri, 24 Apr 2015 19:16:05 +0900 Subject: extcon: Use the unique id for external connector instead of string This patch uses the unique id to identify the type of external connector instead of string name. The string name have the many potential issues. So, this patch defines the 'extcon' enumeration which includes all supported external connector on EXTCON subsystem. If new external connector is necessary, the unique id of new connector have to be added in 'extcon' enumeration. There are current supported external connector in 'enum extcon' as following: enum extcon { EXTCON_NONE = 0x0, /* USB external connector */ EXTCON_USB = 0x1, EXTCON_USB_HOST = 0x2, /* Charger external connector */ EXTCON_TA = 0x10, EXTCON_FAST_CHARGER = 0x11, EXTCON_SLOW_CHARGER = 0x12, EXTCON_CHARGE_DOWNSTREAM = 0x13, /* Audio and video external connector */ EXTCON_LINE_IN = 0x20, EXTCON_LINE_OUT = 0x21, EXTCON_MICROPHONE = 0x22, EXTCON_HEADPHONE = 0x23, EXTCON_HDMI = 0x30, EXTCON_MHL = 0x31, EXTCON_DVI = 0x32, EXTCON_VGA = 0x33, EXTCON_SPDIF_IN = 0x34, EXTCON_SPDIF_OUT = 0x35, EXTCON_VIDEO_IN = 0x36, EXTCON_VIDEO_OUT = 0x37, /* Miscellaneous external connector */ EXTCON_DOCK = 0x50, EXTCON_JIG = 0x51, EXTCON_MECHANICAL = 0x52, EXTCON_END, }; For example in extcon-arizona.c: To use unique id removes the potential issue about handling the inconsistent name of external connector with string. - Previously, use the string to register the type of arizona jack connector static const char *arizona_cable[] = { "Mechanical", "Microphone", "Headphone", "Line-out", }; - Newly, use the unique id to register the type of arizona jack connector static const enum extcon arizona_cable[] = { EXTCON_MECHANICAL, EXTCON_MICROPHONE, EXTCON_HEADPHONE, EXTCON_LINE_OUT, EXTCON_NONE, }; And this patch modify the prototype of extcon_{get|set}_cable_state_() which uses the 'enum extcon id' instead of 'cable_index'. Because although one more extcon drivers support USB cable, each extcon driver might has the differnt 'cable_index' for USB cable. All extcon drivers can use the unique id number for same external connector with modified extcon_{get|set}_cable_state_(). - Previously, use 'cable_index' on these functions: extcon_get_cable_state_(struct extcon_dev*, int cable_index) extcon_set_cable_state_(struct extcon_dev*, int cable_index, bool state) -Newly, use 'enum extcon id' on these functions: extcon_get_cable_state_(struct extcon_dev*, enum extcon id) extcon_set_cable_state_(struct extcon_dev*, enum extcon id, bool state) Cc: Arnd Bergmann Cc: Felipe Balbi Signed-off-by: Chanwoo Choi Acked-by: Roger Quadros Acked-by: Charles Keepax Acked-by: Ramakrishna Pallala Reviewed-by: Krzysztof Kozlowski [arnd: Report the build break about drivers/usb/phy/phy-tahvo.c after using the unique id for external connector insteadf of string] Reported-by: Arnd Bergmann [dan.carpenter: Report the build warning of extcon_{set|get}_cable_state_()] Reported-by: Dan Carpenter --- include/linux/extcon.h | 111 ++++++++++++++------------------- include/linux/extcon/extcon-adc-jack.h | 5 +- 2 files changed, 50 insertions(+), 66 deletions(-) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 799474d9dc48..85c882f0029e 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -1,6 +1,9 @@ /* * External connector (extcon) class driver * + * Copyright (C) 2015 Samsung Electronics + * Author: Chanwoo Choi + * * Copyright (C) 2012 Samsung Electronics * Author: Donggeun Kim * Author: MyungJoo Ham @@ -27,50 +30,41 @@ #include #include -#define SUPPORTED_CABLE_MAX 32 -#define CABLE_NAME_MAX 30 - -/* - * The standard cable name is to help support general notifier - * and notifiee device drivers to share the common names. - * Please use standard cable names unless your notifier device has - * a very unique and abnormal cable or - * the cable type is supposed to be used with only one unique - * pair of notifier/notifiee devices. - * - * Please add any other "standard" cables used with extcon dev. - * - * You may add a dot and number to specify version or specification - * of the specific cable if it is required. (e.g., "Fast-charger.18" - * and "Fast-charger.10" for 1.8A and 1.0A chargers) - * However, the notifiee and notifier should be able to handle such - * string and if the notifiee can negotiate the protocol or identify, - * you don't need such convention. This convention is helpful when - * notifier can distinguish but notifiee cannot. - */ -enum extcon_cable_name { - EXTCON_USB = 0, - EXTCON_USB_HOST, - EXTCON_TA, /* Travel Adaptor */ - EXTCON_FAST_CHARGER, - EXTCON_SLOW_CHARGER, - EXTCON_CHARGE_DOWNSTREAM, /* Charging an external device */ - EXTCON_HDMI, - EXTCON_MHL, - EXTCON_DVI, - EXTCON_VGA, - EXTCON_DOCK, - EXTCON_LINE_IN, - EXTCON_LINE_OUT, - EXTCON_MIC_IN, - EXTCON_HEADPHONE_OUT, - EXTCON_SPDIF_IN, - EXTCON_SPDIF_OUT, - EXTCON_VIDEO_IN, - EXTCON_VIDEO_OUT, - EXTCON_MECHANICAL, +enum extcon { + EXTCON_NONE = 0x0, + + /* USB external connector */ + EXTCON_USB = 0x1, + EXTCON_USB_HOST = 0x2, + + /* Charger external connector */ + EXTCON_TA = 0x10, + EXTCON_FAST_CHARGER = 0x11, + EXTCON_SLOW_CHARGER = 0x12, + EXTCON_CHARGE_DOWNSTREAM = 0x13, + + /* Audio/Video external connector */ + EXTCON_LINE_IN = 0x20, + EXTCON_LINE_OUT = 0x21, + EXTCON_MICROPHONE = 0x22, + EXTCON_HEADPHONE = 0x23, + + EXTCON_HDMI = 0x30, + EXTCON_MHL = 0x31, + EXTCON_DVI = 0x32, + EXTCON_VGA = 0x33, + EXTCON_SPDIF_IN = 0x34, + EXTCON_SPDIF_OUT = 0x35, + EXTCON_VIDEO_IN = 0x36, + EXTCON_VIDEO_OUT = 0x37, + + /* Etc external connector */ + EXTCON_DOCK = 0x50, + EXTCON_JIG = 0x51, + EXTCON_MECHANICAL = 0x52, + + EXTCON_END, }; -extern const char extcon_cable_name[][CABLE_NAME_MAX + 1]; struct extcon_cable; @@ -78,7 +72,7 @@ struct extcon_cable; * struct extcon_dev - An extcon device represents one external connector. * @name: The name of this extcon device. Parent device name is * used if NULL. - * @supported_cable: Array of supported cable names ending with NULL. + * @supported_cable: Array of supported cable names ending with EXTCON_NONE. * If supported_cable is NULL, cable name related APIs * are disabled. * @mutually_exclusive: Array of mutually exclusive set of cables that cannot @@ -113,7 +107,7 @@ struct extcon_cable; struct extcon_dev { /* Optional user initializing data */ const char *name; - const char **supported_cable; + const enum extcon *supported_cable; const u32 *mutually_exclusive; /* Optional callbacks to override class functions */ @@ -194,10 +188,10 @@ extern struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name); /* * Following APIs control the memory of extcon device. */ -extern struct extcon_dev *extcon_dev_allocate(const char **cables); +extern struct extcon_dev *extcon_dev_allocate(const enum extcon *cable); extern void extcon_dev_free(struct extcon_dev *edev); extern struct extcon_dev *devm_extcon_dev_allocate(struct device *dev, - const char **cables); + const enum extcon *cable); extern void devm_extcon_dev_free(struct device *dev, struct extcon_dev *edev); /* @@ -216,13 +210,10 @@ extern int extcon_update_state(struct extcon_dev *edev, u32 mask, u32 state); /* * get/set_cable_state access each bit of the 32b encoded state value. - * They are used to access the status of each cable based on the cable_name - * or cable_index, which is retrieved by extcon_find_cable_index + * They are used to access the status of each cable based on the cable_name. */ -extern int extcon_find_cable_index(struct extcon_dev *sdev, - const char *cable_name); -extern int extcon_get_cable_state_(struct extcon_dev *edev, int cable_index); -extern int extcon_set_cable_state_(struct extcon_dev *edev, int cable_index, +extern int extcon_get_cable_state_(struct extcon_dev *edev, enum extcon id); +extern int extcon_set_cable_state_(struct extcon_dev *edev, enum extcon id, bool cable_state); extern int extcon_get_cable_state(struct extcon_dev *edev, @@ -281,7 +272,7 @@ static inline int devm_extcon_dev_register(struct device *dev, static inline void devm_extcon_dev_unregister(struct device *dev, struct extcon_dev *edev) { } -static inline struct extcon_dev *extcon_dev_allocate(const char **cables) +static inline struct extcon_dev *extcon_dev_allocate(const enum extcon *cable) { return ERR_PTR(-ENOSYS); } @@ -289,7 +280,7 @@ static inline struct extcon_dev *extcon_dev_allocate(const char **cables) static inline void extcon_dev_free(struct extcon_dev *edev) { } static inline struct extcon_dev *devm_extcon_dev_allocate(struct device *dev, - const char **cables) + const enum extcon *cable) { return ERR_PTR(-ENOSYS); } @@ -312,20 +303,14 @@ static inline int extcon_update_state(struct extcon_dev *edev, u32 mask, return 0; } -static inline int extcon_find_cable_index(struct extcon_dev *edev, - const char *cable_name) -{ - return 0; -} - static inline int extcon_get_cable_state_(struct extcon_dev *edev, - int cable_index) + enum extcon id) { return 0; } static inline int extcon_set_cable_state_(struct extcon_dev *edev, - int cable_index, bool cable_state) + enum extcon id, bool cable_state) { return 0; } diff --git a/include/linux/extcon/extcon-adc-jack.h b/include/linux/extcon/extcon-adc-jack.h index 9ca958c4e94c..53c60806bcfb 100644 --- a/include/linux/extcon/extcon-adc-jack.h +++ b/include/linux/extcon/extcon-adc-jack.h @@ -44,7 +44,7 @@ struct adc_jack_cond { * @consumer_channel: Unique name to identify the channel on the consumer * side. This typically describes the channels used within * the consumer. E.g. 'battery_voltage' - * @cable_names: array of cable names ending with null. + * @cable_names: array of extcon id for supported cables. * @adc_contitions: array of struct adc_jack_cond conditions ending * with .state = 0 entry. This describes how to decode * adc values into extcon state. @@ -58,8 +58,7 @@ struct adc_jack_pdata { const char *name; const char *consumer_channel; - /* The last entry should be NULL */ - const char **cable_names; + const enum extcon *cable_names; /* The last entry's state should be 0 */ struct adc_jack_cond *adc_conditions; -- cgit v1.2.3 From 046050f6e623e442e9c71c525462ebd395dae526 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Tue, 19 May 2015 20:01:12 +0900 Subject: extcon: Update the prototype of extcon_register_notifier() with enum extcon Previously, extcon consumer driver used the extcon_register_interest() to register the notifier chain and then to receive the notifier event when external connector's state is changed. When registering the notifier chain for specific external connector with extcon_register_interest(), it used the the string name of external connector directly. There are potential problem because of unclear, non-standard and inconsequent cable name. Namely, it is not appropriate method to identify each external connector. So, this patch modify the prototype of extcon_register_notifier() by using the 'enum extcon' which are the unique id for each external connector instead of unclear string method. - Previously, the extcon consumer driver used the extcon_register_interest() with 'cable_name' to point out the specific external connector. Also. it used the un-needed structure (struct extcon_specific_cable_nb). : int extcon_register_interest(struct extcon_specific_cable_nb *obj, const char *extcon_name, const char *cable_name, struct notifier_block *nb) - Newly, the updated extcon_register_notifier() would definitely support the same feature to detech the changed state of external connector without any specific structure (struct extcon_specific_cable_nb). : int extcon_register_notifier(struct extcon_dev *edev, enum extcon id, struct notifier_block *nb) This patch support the both extcon_register_interest() and new extcon_register_ notifier(). But the extcon_{register|unregister}_interest() will be deprecated because extcon core would support the notifier event for extcon consumer driver with only updated extcon_register_notifier() and 'extcon_specific_cable_nb' will be removed if there are no extcon consumer driver with legacy extcon_{register|unregister}_interest(). Signed-off-by: Chanwoo Choi Reviewed-by: Krzysztof Kozlowski --- include/linux/extcon.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 85c882f0029e..be9652b3a154 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -116,7 +116,7 @@ struct extcon_dev { /* Internal data. Please do not set. */ struct device dev; - struct raw_notifier_head nh; + struct raw_notifier_head *nh; struct list_head entry; int max_supported; spinlock_t lock; /* could be called by irq handler */ @@ -155,8 +155,6 @@ struct extcon_cable { /** * struct extcon_specific_cable_nb - An internal data for * extcon_register_interest(). - * @internal_nb: A notifier block bridging extcon notifier - * and cable notifier. * @user_nb: user provided notifier block for events from * a specific cable. * @cable_index: the target cable. @@ -164,7 +162,6 @@ struct extcon_cable { * @previous_value: the saved previous event value. */ struct extcon_specific_cable_nb { - struct notifier_block internal_nb; struct notifier_block *user_nb; int cable_index; struct extcon_dev *edev; @@ -240,10 +237,10 @@ extern int extcon_unregister_interest(struct extcon_specific_cable_nb *nb); * we do not recommend to use this for normal 'notifiee' device drivers who * want to be notified by a specific external port of the notifier. */ -extern int extcon_register_notifier(struct extcon_dev *edev, +extern int extcon_register_notifier(struct extcon_dev *edev, enum extcon id, + struct notifier_block *nb); +extern int extcon_unregister_notifier(struct extcon_dev *edev, enum extcon id, struct notifier_block *nb); -extern int extcon_unregister_notifier(struct extcon_dev *edev, - struct notifier_block *nb); /* * Following API get the extcon device from devicetree. @@ -333,13 +330,15 @@ static inline struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name) } static inline int extcon_register_notifier(struct extcon_dev *edev, - struct notifier_block *nb) + enum extcon id, + struct notifier_block *nb) { return 0; } static inline int extcon_unregister_notifier(struct extcon_dev *edev, - struct notifier_block *nb) + enum extcon id, + struct notifier_block *nb) { return 0; } -- cgit v1.2.3 From 668abc729fcb9d034eccadf63166d2c76cd645d1 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 21 May 2015 17:42:43 +0100 Subject: regmap: Introduce regmap_get_max_register This patch introduces regmap_get_max_register() function which would be used by the infrastructures like nvmem framework built on top of regmap. Signed-off-by: Srinivas Kandagatla Signed-off-by: Mark Brown --- include/linux/regmap.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 116655d92269..2d87deda79cd 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -433,6 +433,7 @@ int regmap_update_bits_check_async(struct regmap *map, unsigned int reg, unsigned int mask, unsigned int val, bool *change); int regmap_get_val_bytes(struct regmap *map); +int regmap_get_max_register(struct regmap *map); int regmap_async_complete(struct regmap *map); bool regmap_can_raw_write(struct regmap *map); @@ -676,6 +677,12 @@ static inline int regmap_get_val_bytes(struct regmap *map) return -EINVAL; } +static inline int regmap_get_max_register(struct regmap *map) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + static inline int regcache_sync(struct regmap *map) { WARN_ONCE(1, "regmap API is disabled"); -- cgit v1.2.3 From a2f776cbb8271d7149784207da0b0c51e8b1847c Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 21 May 2015 17:42:54 +0100 Subject: regmap: Introduce regmap_get_reg_stride This patch introduces regmap_get_reg_stride() function which would be used by the infrastructures like nvmem framework built on top of regmap. Mostly this function would be used for sanity checks on inputs within such infrastructure. Signed-off-by: Srinivas Kandagatla Signed-off-by: Mark Brown --- include/linux/regmap.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 2d87deda79cd..59c55ea0f0b5 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -434,6 +434,7 @@ int regmap_update_bits_check_async(struct regmap *map, unsigned int reg, bool *change); int regmap_get_val_bytes(struct regmap *map); int regmap_get_max_register(struct regmap *map); +int regmap_get_reg_stride(struct regmap *map); int regmap_async_complete(struct regmap *map); bool regmap_can_raw_write(struct regmap *map); @@ -683,6 +684,12 @@ static inline int regmap_get_max_register(struct regmap *map) return -EINVAL; } +static inline int regmap_get_reg_stride(struct regmap *map) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + static inline int regcache_sync(struct regmap *map) { WARN_ONCE(1, "regmap API is disabled"); -- cgit v1.2.3 From 69eb0980ab4ced06f7c2b4774575337ce32912fb Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Thu, 23 Apr 2015 16:10:24 +0530 Subject: regulator: max8973: add mechanism to enable/disable through GPIO MAX8973 supports the voltage output enable/disable through its EN pin. This EN pin can be connected through GPIO from host processor. Add support to provide GPIO number from platform/DT and if it is valid GPIO then enable external control default. Signed-off-by: Laxman Dewangan Signed-off-by: Mark Brown --- include/linux/regulator/max8973-regulator.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/max8973-regulator.h b/include/linux/regulator/max8973-regulator.h index f8acc052e353..f6a8a16a0d4d 100644 --- a/include/linux/regulator/max8973-regulator.h +++ b/include/linux/regulator/max8973-regulator.h @@ -58,6 +58,9 @@ * control signal from EN input pin. If it is false then * voltage output will be enabled/disabled through EN bit of * device register. + * @enable_gpio: Enable GPIO. If EN pin is controlled through GPIO from host + * then GPIO number can be provided. If no GPIO controlled then + * it should be -1. * @dvs_gpio: GPIO for dvs. It should be -1 if this is tied with fixed logic. * @dvs_def_state: Default state of dvs. 1 if it is high else 0. */ @@ -65,6 +68,7 @@ struct max8973_regulator_platform_data { struct regulator_init_data *reg_init_data; unsigned long control_flags; bool enable_ext_control; + int enable_gpio; int dvs_gpio; unsigned dvs_def_state:1; }; -- cgit v1.2.3 From f705f837c58ebe1ea69dfffff4dcc234e2fbc8dd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 22 May 2015 11:12:38 +0200 Subject: nvme: consolidate synchronous command submission helpers Note that we keep the unused timeout argument, but allow callers to pass 0 instead of a timeout if they want the default. This will allow adding a timeout to the pass through path later on. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 8dbd05e70f09..61488b2ae291 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -158,11 +158,7 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, unsigned long addr, unsigned length); void nvme_unmap_user_pages(struct nvme_dev *dev, int write, struct nvme_iod *iod); -int nvme_submit_io_cmd(struct nvme_dev *, struct nvme_ns *, - struct nvme_command *, u32 *); -int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns); -int nvme_submit_admin_cmd(struct nvme_dev *, struct nvme_command *, - u32 *result); +int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd); int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns, dma_addr_t dma_addr); int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, -- cgit v1.2.3 From e75ec752d725b7b612c0b2db1bca50a9e53c0879 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 22 May 2015 11:12:39 +0200 Subject: nvme: store a struct device pointer in struct nvme_dev Most users want the generic device, so store that in struct nvme_dev instead of the pci_dev. This also happens to be a nice step towards making some code reusable for non-PCI transports. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 61488b2ae291..de0e49a716b8 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -74,7 +74,7 @@ struct nvme_dev { struct blk_mq_tag_set tagset; struct blk_mq_tag_set admin_tagset; u32 __iomem *dbs; - struct pci_dev *pci_dev; + struct device *dev; struct dma_pool *prp_page_pool; struct dma_pool *prp_small_pool; int instance; -- cgit v1.2.3 From d29ec8241c10eacf59c23b3828a88dbae06e7e3f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 22 May 2015 11:12:46 +0200 Subject: nvme: submit internal commands through the block layer Use block layer queues with an internal cmd_type to submit internally generated NVMe commands. This both simplifies the code a lot and allow for a better structure. For example now the LighNVM code can construct commands without knowing the details of the underlying I/O descriptors. Or a future NVMe over network target could inject commands, as well as could the SCSI translation and ioctl code be reused for such a beast. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme.h | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index de0e49a716b8..986bf8ad8e93 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -146,21 +146,15 @@ static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector) return (sector >> (ns->lba_shift - 9)); } -/** - * nvme_free_iod - frees an nvme_iod - * @dev: The device that the I/O was submitted to - * @iod: The memory to free - */ -void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod); - -int nvme_setup_prps(struct nvme_dev *, struct nvme_iod *, int, gfp_t); -struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, - unsigned long addr, unsigned length); -void nvme_unmap_user_pages(struct nvme_dev *dev, int write, - struct nvme_iod *iod); -int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd); -int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns, - dma_addr_t dma_addr); +int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, + void *buf, unsigned bufflen); +int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, + void *buffer, void __user *ubuffer, unsigned bufflen, + u32 *result, unsigned timeout); +int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id); +int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid, + struct nvme_id_ns **id); +int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log); int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, dma_addr_t dma_addr, u32 *result); int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, -- cgit v1.2.3 From 326e1dbb57368087a36607aaebe9795b8d5453e5 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 22 May 2015 09:14:03 -0400 Subject: block: remove management of bi_remaining when restoring original bi_end_io Commit c4cf5261 ("bio: skip atomic inc/dec of ->bi_remaining for non-chains") regressed all existing callers that followed this pattern: 1) saving a bio's original bi_end_io 2) wiring up an intermediate bi_end_io 3) restoring the original bi_end_io from intermediate bi_end_io 4) calling bio_endio() to execute the restored original bi_end_io The regression was due to BIO_CHAIN only ever getting set if bio_inc_remaining() is called. For the above pattern it isn't set until step 3 above (step 2 would've needed to establish BIO_CHAIN). As such the first bio_endio(), in step 2 above, never decremented __bi_remaining before calling the intermediate bi_end_io -- leaving __bi_remaining with the value 1 instead of 0. When bio_inc_remaining() occurred during step 3 it brought it to a value of 2. When the second bio_endio() was called, in step 4 above, it should've called the original bi_end_io but it didn't because there was an extra reference that wasn't dropped (due to atomic operations being optimized away since BIO_CHAIN wasn't set upfront). Fix this issue by removing the __bi_remaining management complexity for all callers that use the above pattern -- bio_chain() is the only interface that _needs_ to be concerned with __bi_remaining. For the above pattern callers just expect the bi_end_io they set to get called! Remove bio_endio_nodec() and also remove all bio_inc_remaining() calls that aren't associated with the bio_chain() interface. Also, the bio_inc_remaining() interface has been moved local to bio.c. Fixes: c4cf5261 ("bio: skip atomic inc/dec of ->bi_remaining for non-chains") Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/bio.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 7486ea103f6e..f0291cf64cc5 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -427,7 +427,6 @@ static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask) } extern void bio_endio(struct bio *, int); -extern void bio_endio_nodec(struct bio *, int); struct request_queue; extern int bio_phys_segments(struct request_queue *, struct bio *); @@ -658,17 +657,6 @@ static inline struct bio *bio_list_get(struct bio_list *bl) return bio; } -/* - * Increment chain count for the bio. Make sure the CHAIN flag update - * is visible before the raised count. - */ -static inline void bio_inc_remaining(struct bio *bio) -{ - bio->bi_flags |= (1 << BIO_CHAIN); - smp_mb__before_atomic(); - atomic_inc(&bio->__bi_remaining); -} - /* * bio_set is used to allow other portions of the IO system to * allocate their own private memory pools for bio and iovec structures. -- cgit v1.2.3 From 5f1b670d0bef508a5554d92525f5f6d00d640b38 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 22 May 2015 09:14:04 -0400 Subject: block, dm: don't copy bios for request clones Currently dm-multipath has to clone the bios for every request sent to the lower devices, which wastes cpu cycles and ties down memory. This patch instead adds a new REQ_CLONE flag that instructs req_bio_endio to not complete bios attached to a request, which we set on clone requests similar to bios in a flush sequence. With this change I/O errors on a path failure only get propagated to dm-multipath, which can then either resubmit the I/O or complete the bios on the original request. I've done some basic testing of this on a Linux target with ALUA support, and it survives path failures during I/O nicely. Signed-off-by: Christoph Hellwig Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 ++ include/linux/blkdev.h | 6 +----- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 3f4ded0b1a34..45a6be89957c 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -192,6 +192,7 @@ enum rq_flag_bits { __REQ_HASHED, /* on IO scheduler merge hash */ __REQ_MQ_INFLIGHT, /* track inflight for MQ */ __REQ_NO_TIMEOUT, /* requests may never expire */ + __REQ_CLONE, /* cloned bios */ __REQ_NR_BITS, /* stops here */ }; @@ -246,5 +247,6 @@ enum rq_flag_bits { #define REQ_HASHED (1ULL << __REQ_HASHED) #define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT) #define REQ_NO_TIMEOUT (1ULL << __REQ_NO_TIMEOUT) +#define REQ_CLONE (1ULL << __REQ_CLONE) #endif /* __LINUX_BLK_TYPES_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bc917956a6d0..9ded80da2c16 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -775,11 +775,7 @@ extern void blk_add_request_payload(struct request *rq, struct page *page, unsigned int len); extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); extern int blk_lld_busy(struct request_queue *q); -extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, - struct bio_set *bs, gfp_t gfp_mask, - int (*bio_ctr)(struct bio *, struct bio *, void *), - void *data); -extern void blk_rq_unprep_clone(struct request *rq); +extern void blk_rq_prep_clone(struct request *rq, struct request *rq_src); extern int blk_insert_cloned_request(struct request_queue *q, struct request *rq); extern void blk_delay_queue(struct request_queue *, unsigned long); -- cgit v1.2.3 From d0751b98dfa391f862e02dc36a233a54615e3f1d Mon Sep 17 00:00:00 2001 From: Yijing Wang Date: Thu, 21 May 2015 15:05:02 +0800 Subject: PCI: Add dev->has_secondary_link to track downstream PCIe links A PCIe Port is an interface to a Link. A Root Port is a PCI-PCI bridge in a Root Complex and has a Link on its secondary (downstream) side. For other Ports, the Link may be on either the upstream (closer to the Root Complex) or downstream side of the Port. The usual topology has a Root Port connected to an Upstream Port. We previously assumed this was the only possible topology, and that a Downstream Port's Link was always on its downstream side, like this: +---------------------+ +------+ | Downstream | | Root | | Upstream Port +--Link-- | Port +--Link--+ Port | +------+ | Downstream | | Port +--Link-- +---------------------+ But systems do exist (see URL below) where the Root Port is connected to a Downstream Port. In this case, a Downstream Port's Link may be on either the upstream or downstream side: +---------------------+ +------+ | Upstream | | Root | | Downstream Port +--Link-- | Port +--Link--+ Port | +------+ | Downstream | | Port +--Link-- +---------------------+ We can't use the Port type to determine which side the Link is on, so add a bit in struct pci_dev to keep track. A Root Port's Link is always on the Port's secondary side. A component (Endpoint or Port) on the other end of the Link obviously has the Link on its upstream side. If that component is a Port, it is part of a Switch or a Bridge. A Bridge has a PCI or PCI-X bus on its secondary side, not a Link. The internal bus of a Switch connects the Port to another Port whose Link is on the downstream side. [bhelgaas: changelog, comment, cache "type", use if/else] Link: http://lkml.kernel.org/r/54EB81B2.4050904@pobox.com Link: https://bugzilla.kernel.org/show_bug.cgi?id=94361 Suggested-by: Bjorn Helgaas Signed-off-by: Yijing Wang Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 353db8dc4c6e..1989f6dc9618 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -355,6 +355,7 @@ struct pci_dev { unsigned int broken_intx_masking:1; unsigned int io_window_1k:1; /* Intel P2P bridge 1K I/O windows */ unsigned int irq_managed:1; + unsigned int has_secondary_link:1; pci_dev_flags_t dev_flags; atomic_t enable_cnt; /* pci_enable_device has been called */ -- cgit v1.2.3 From 6374f9124efea5fae9cba263108583c39e22f86b Mon Sep 17 00:00:00 2001 From: Harald Geyer Date: Tue, 7 Apr 2015 11:12:35 +0000 Subject: timekeeping: Provide new API to get the current time resolution This patch series introduces a new function u32 ktime_get_resolution_ns(void) which allows to clean up some driver code. In particular the IIO subsystem has a function to provide timestamps for events but no means to get their resolution. So currently the dht11 driver tries to guess the resolution in a rather messy and convoluted way. We can do much better with the new code. This API is not designed to be exposed to user space. This has been tested on i386, sunxi and mxs. Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Harald Geyer [jstultz: Tweaked to make it build after upstream changes] Signed-off-by: John Stultz --- include/linux/timekeeping.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 99176af216af..9af5c1214682 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -163,6 +163,7 @@ extern ktime_t ktime_get(void); extern ktime_t ktime_get_with_offset(enum tk_offsets offs); extern ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs); extern ktime_t ktime_get_raw(void); +extern u32 ktime_get_resolution_ns(void); /** * ktime_get_real - get the real (wall-) time in ktime_t format -- cgit v1.2.3 From 57d05a93ada77c4f8a6112cbc867a2948dce7991 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 13 May 2015 16:04:47 -0700 Subject: time: Rework debugging variables so they aren't global Ingo suggested that the timekeeping debugging variables recently added should not be global, and should be tied to the timekeeper's read_base. Thus this patch implements that suggestion. This version is different from the earlier versions as it keeps the variables in the timekeeper structure rather then in the tkr. Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Signed-off-by: John Stultz --- include/linux/timekeeper_internal.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 6f8276ae579c..e1f5a1136554 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -61,6 +61,9 @@ struct tk_read_base { * shifted nano seconds. * @ntp_error_shift: Shift conversion between clock shifted nano seconds and * ntp shifted nano seconds. + * @last_warning: Warning ratelimiter (DEBUG_TIMEKEEPING) + * @underflow_seen: Underflow warning flag (DEBUG_TIMEKEEPING) + * @overflow_seen: Overflow warning flag (DEBUG_TIMEKEEPING) * * Note: For timespec(64) based interfaces wall_to_monotonic is what * we need to add to xtime (or xtime corrected for sub jiffie times) @@ -106,6 +109,18 @@ struct timekeeper { s64 ntp_error; u32 ntp_error_shift; u32 ntp_err_mult; +#ifdef CONFIG_DEBUG_TIMEKEEPING + long last_warning; + /* + * These simple flag variables are managed + * without locks, which is racy, but they are + * ok since we don't really care about being + * super precise about how many events were + * seen, just that a problem was observed. + */ + int underflow_seen; + int overflow_seen; +#endif }; #ifdef CONFIG_GENERIC_TIME_VSYSCALL -- cgit v1.2.3 From 30f3b3f9836c7c7e1f42ca855bbe8127fff4b99a Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Thu, 9 Apr 2015 09:04:40 +0800 Subject: time: Include math64.h in time64.h On 32-bit systems, timespec64_add_ns() calls __iter_div_u64_rem() which needs math64.h, and we want to include time64.h in some cases. Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Xunlei Pang Signed-off-by: John Stultz --- include/linux/time64.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/time64.h b/include/linux/time64.h index a3831478d9cf..12d4e82b0276 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -2,6 +2,7 @@ #define _LINUX_TIME64_H #include +#include typedef __s64 time64_t; -- cgit v1.2.3 From e83d0a4106d81dd08b70318f078f3bad6acdc110 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Thu, 9 Apr 2015 09:04:42 +0800 Subject: time: Remove read_boot_clock() Now that we have a read_boot_clock64() function available on every architecture, and converted all the users to it, it's time to remove the (now unused) read_boot_clock() completely from the kernel. Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Xunlei Pang [jstultz: Minor commit message tweak suggested by Ingo] Signed-off-by: John Stultz --- include/linux/timekeeping.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 9af5c1214682..3aa72e648650 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -267,7 +267,6 @@ extern int persistent_clock_is_local; extern void read_persistent_clock(struct timespec *ts); extern void read_persistent_clock64(struct timespec64 *ts); -extern void read_boot_clock(struct timespec *ts); extern void read_boot_clock64(struct timespec64 *ts); extern int update_persistent_clock(struct timespec now); extern int update_persistent_clock64(struct timespec64 now); -- cgit v1.2.3 From d654976cbf852ee20612ee10dbe57cdacda9f452 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 May 2015 21:51:19 -0700 Subject: tcp: fix a potential deadlock in tcp_get_info() Taking socket spinlock in tcp_get_info() can deadlock, as inet_diag_dump_icsk() holds the &hashinfo->ehash_locks[i], while packet processing can use the reverse locking order. We could avoid this locking for TCP_LISTEN states, but lockdep would certainly get confused as all TCP sockets share same lockdep classes. [ 523.722504] ====================================================== [ 523.728706] [ INFO: possible circular locking dependency detected ] [ 523.734990] 4.1.0-dbg-DEV #1676 Not tainted [ 523.739202] ------------------------------------------------------- [ 523.745474] ss/18032 is trying to acquire lock: [ 523.750002] (slock-AF_INET){+.-...}, at: [] tcp_get_info+0x2c4/0x360 [ 523.758129] [ 523.758129] but task is already holding lock: [ 523.763968] (&(&hashinfo->ehash_locks[i])->rlock){+.-...}, at: [] inet_diag_dump_icsk+0x1d5/0x6c0 [ 523.774661] [ 523.774661] which lock already depends on the new lock. [ 523.774661] [ 523.782850] [ 523.782850] the existing dependency chain (in reverse order) is: [ 523.790326] -> #1 (&(&hashinfo->ehash_locks[i])->rlock){+.-...}: [ 523.796599] [] lock_acquire+0xbb/0x270 [ 523.802565] [] _raw_spin_lock+0x38/0x50 [ 523.808628] [] __inet_hash_nolisten+0x78/0x110 [ 523.815273] [] tcp_v4_syn_recv_sock+0x24b/0x350 [ 523.822067] [] tcp_check_req+0x3c1/0x500 [ 523.828199] [] tcp_v4_do_rcv+0x239/0x3d0 [ 523.834331] [] tcp_v4_rcv+0xa8e/0xc10 [ 523.840202] [] ip_local_deliver_finish+0x133/0x3e0 [ 523.847214] [] ip_local_deliver+0xaa/0xc0 [ 523.853440] [] ip_rcv_finish+0x168/0x5c0 [ 523.859624] [] ip_rcv+0x307/0x420 Lets use u64_sync infrastructure instead. As a bonus, 64bit arches get optimized, as these are nop for them. Fixes: 0df48c26d841 ("tcp: add tcpi_bytes_acked to tcp_info") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 3b2911502a8c..e8bbf403618f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -158,6 +158,8 @@ struct tcp_sock { * sum(delta(snd_una)), or how many bytes * were acked. */ + struct u64_stats_sync syncp; /* protects 64bit vars (cf tcp_get_info()) */ + u32 snd_una; /* First byte we want an ack for */ u32 snd_sml; /* Last byte of the most recently transmitted small packet */ u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ -- cgit v1.2.3 From 9d16f207112f77711600fb0770182a06e056e5de Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Mon, 18 May 2015 10:43:31 +0530 Subject: cpufreq: Track cpu managing sysfs kobjects separately In order to prepare for the next few commits, that will stop migrating sysfs files on cpu hotplug, this patch starts managing sysfs-cpu separately. The behavior is still the same as we are still migrating sysfs files on hotplug, later commits would change that. Signed-off-by: Saravana Kannan Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 48e37c07eb84..29ad97c34fd5 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -65,7 +65,9 @@ struct cpufreq_policy { unsigned int shared_type; /* ACPI: ANY or ALL affected CPUs should set cpufreq */ - unsigned int cpu; /* cpu nr of CPU managing this policy */ + unsigned int cpu; /* cpu managing this policy, must be online */ + unsigned int kobj_cpu; /* cpu managing sysfs files, can be offline */ + struct clk *clk; struct cpufreq_cpuinfo cpuinfo;/* see above */ -- cgit v1.2.3 From 0824965140fff1bf640a987dc790d1594a8e0699 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 13 Apr 2015 16:23:36 +0200 Subject: PCI: Propagate the "ignore hotplug" setting to parent Refine the mechanism introduced by commit f244d8b623da ("ACPIPHP / radeon / nouveau: Fix VGA switcheroo problem related to hotplug") to propagate the ignore_hotplug setting of the device to its parent bridge in case hotplug notifications related to the graphics adapter switching are given for the bridge rather than for the device itself (they need to be ignored in both cases). Link: https://bugzilla.kernel.org/show_bug.cgi?id=61891 Link: https://bugs.freedesktop.org/show_bug.cgi?id=88927 Fixes: b440bde74f04 ("PCI: Add pci_ignore_hotplug() to ignore hotplug events for a device") Reported-and-tested-by: tiagdtd-lava Signed-off-by: Rafael J. Wysocki Signed-off-by: Bjorn Helgaas CC: stable@vger.kernel.org # v3.17+ --- include/linux/pci.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 353db8dc4c6e..ef45ffe9ca88 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1006,6 +1006,7 @@ int __must_check pci_assign_resource(struct pci_dev *dev, int i); int __must_check pci_reassign_resource(struct pci_dev *dev, int i, resource_size_t add_size, resource_size_t align); int pci_select_bars(struct pci_dev *dev, unsigned long flags); bool pci_device_is_present(struct pci_dev *pdev); +void pci_ignore_hotplug(struct pci_dev *dev); /* ROM control related routines */ int pci_enable_rom(struct pci_dev *pdev); @@ -1043,11 +1044,6 @@ bool pci_dev_run_wake(struct pci_dev *dev); bool pci_check_pme_status(struct pci_dev *dev); void pci_pme_wakeup_bus(struct pci_bus *bus); -static inline void pci_ignore_hotplug(struct pci_dev *dev) -{ - dev->ignore_hotplug = 1; -} - static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable) { -- cgit v1.2.3 From edd4ab0559316a1efe0881a4e2ccaeb4fec73142 Mon Sep 17 00:00:00 2001 From: Ramakrishna Pallala Date: Sun, 24 May 2015 09:11:58 +0530 Subject: power: max17042_battery: add HEALTH and TEMP_* properties support This patch adds the support for following battery properties to max17042 fuel gauge driver. POWER_SUPPLY_PROP_TEMP_ALERT_MIN POWER_SUPPLY_PROP_TEMP_ALERT_MAX POWER_SUPPLY_PROP_TEMP_MIN POWER_SUPPLY_PROP_TEMP_MAX POWER_SUPPLY_PROP_HEALTH Signed-off-by: Ramakrishna Pallala Reviewed-by: Krzysztof Kozlowski Signed-off-by: Sebastian Reichel --- include/linux/power/max17042_battery.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/power/max17042_battery.h b/include/linux/power/max17042_battery.h index cf112b4075c8..522757ac9cd4 100644 --- a/include/linux/power/max17042_battery.h +++ b/include/linux/power/max17042_battery.h @@ -215,6 +215,10 @@ struct max17042_platform_data { * the datasheet although it can be changed by board designers. */ unsigned int r_sns; + int vmin; /* in millivolts */ + int vmax; /* in millivolts */ + int temp_min; /* in tenths of degree Celsius */ + int temp_max; /* in tenths of degree Celsius */ }; #endif /* __MAX17042_BATTERY_H_ */ -- cgit v1.2.3 From 843735b788a4e49c453f4aefdae80e6dfbe9ee85 Mon Sep 17 00:00:00 2001 From: Ramakrishna Pallala Date: Mon, 4 May 2015 22:16:07 +0530 Subject: power: axp288_charger: axp288 charger driver This patch adds new power supply charger driver support for X-Power AXP288 PMIC integrated charger. This driver interfaces with the axp20x mfd driver as a cell and listens to extcon cable events for setting up charging. Signed-off-by: Ramakrishna Pallala Acked-by: Lee Jones Signed-off-by: Sebastian Reichel --- include/linux/mfd/axp20x.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index dfabd6db7ddf..f9030df5acd1 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -275,4 +275,11 @@ struct axp20x_fg_pdata { int thermistor_curve[MAX_THERM_CURVE_SIZE][2]; }; +struct axp20x_chrg_pdata { + int max_cc; + int max_cv; + int def_cc; + int def_cv; +}; + #endif /* __LINUX_MFD_AXP20X_H */ -- cgit v1.2.3 From c93b76b34b4d8dbe8e3443eb27e49ac60034342b Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Thu, 7 May 2015 15:54:02 +0300 Subject: mei: bus: report also uuid in module alias In order to automate modules matching add device uuid which is reported in client enumeration, keep also the name that is needed in for nfc distinguishing radio vendor Report mei:name:uuid Cc: linux-api@vger.kernel.org Cc: Samuel Ortiz Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- include/linux/mod_devicetable.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 3bfd56778c29..2d2b2b571d61 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -599,9 +599,22 @@ struct ipack_device_id { #define MEI_CL_MODULE_PREFIX "mei:" #define MEI_CL_NAME_SIZE 32 +#define MEI_CL_UUID_FMT "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x" +#define MEI_CL_UUID_ARGS(_u) \ + _u[0], _u[1], _u[2], _u[3], _u[4], _u[5], _u[6], _u[7], \ + _u[8], _u[9], _u[10], _u[11], _u[12], _u[13], _u[14], _u[15] +/** + * struct mei_cl_device_id - MEI client device identifier + * @name: helper name + * @uuid: client uuid + * @driver_info: information used by the driver. + * + * identifies mei client device by uuid and name + */ struct mei_cl_device_id { char name[MEI_CL_NAME_SIZE]; + __u8 uuid[16]; kernel_ulong_t driver_info; }; -- cgit v1.2.3 From dbac993f6a6df24d5edc362667e524ba43543472 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Thu, 7 May 2015 15:54:07 +0300 Subject: mei: export mei client device struct to external use Cc: Samuel Ortiz Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- include/linux/mei_cl_bus.h | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h index 0819d36a3a74..a16b1f9c1aca 100644 --- a/include/linux/mei_cl_bus.h +++ b/include/linux/mei_cl_bus.h @@ -7,6 +7,42 @@ struct mei_cl_device; +typedef void (*mei_cl_event_cb_t)(struct mei_cl_device *device, + u32 events, void *context); + +/** + * struct mei_cl_device - MEI device handle + * An mei_cl_device pointer is returned from mei_add_device() + * and links MEI bus clients to their actual ME host client pointer. + * Drivers for MEI devices will get an mei_cl_device pointer + * when being probed and shall use it for doing ME bus I/O. + * + * @dev: linux driver model device pointer + * @me_cl: me client + * @cl: mei client + * @name: device name + * @event_work: async work to execute event callback + * @event_cb: Drivers register this callback to get asynchronous ME + * events (e.g. Rx buffer pending) notifications. + * @event_context: event callback run context + * @events: Events bitmask sent to the driver. + * @priv_data: client private data + */ +struct mei_cl_device { + struct device dev; + + struct mei_me_client *me_cl; + struct mei_cl *cl; + char name[MEI_CL_NAME_SIZE]; + + struct work_struct event_work; + mei_cl_event_cb_t event_cb; + void *event_context; + unsigned long events; + + void *priv_data; +}; + struct mei_cl_driver { struct device_driver driver; const char *name; @@ -28,8 +64,6 @@ void mei_cl_driver_unregister(struct mei_cl_driver *driver); ssize_t mei_cl_send(struct mei_cl_device *device, u8 *buf, size_t length); ssize_t mei_cl_recv(struct mei_cl_device *device, u8 *buf, size_t length); -typedef void (*mei_cl_event_cb_t)(struct mei_cl_device *device, - u32 events, void *context); int mei_cl_register_event_cb(struct mei_cl_device *device, mei_cl_event_cb_t read_cb, void *context); -- cgit v1.2.3 From 7df20f2d893db42eaa1ea1e30a2573c971ec9238 Mon Sep 17 00:00:00 2001 From: Sudeep Dutt Date: Wed, 29 Apr 2015 05:32:28 -0700 Subject: misc: mic: SCIF header file and IOCTL interface This patch introduces the SCIF documentation in the header file and describes the IOCTL interface for user mode. mic_overview.txt is updated with documentation on SCIF and a new document describing SCIF in more details is available in scif_overview.txt. Reviewed-by: Nikhil Rao Reviewed-by: Ashutosh Dixit Signed-off-by: Sudeep Dutt Signed-off-by: Greg Kroah-Hartman --- include/linux/scif.h | 993 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 993 insertions(+) create mode 100644 include/linux/scif.h (limited to 'include/linux') diff --git a/include/linux/scif.h b/include/linux/scif.h new file mode 100644 index 000000000000..44f4f3898bbe --- /dev/null +++ b/include/linux/scif.h @@ -0,0 +1,993 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright(c) 2014 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Intel SCIF driver. + * + */ +#ifndef __SCIF_H__ +#define __SCIF_H__ + +#include +#include +#include + +#define SCIF_ACCEPT_SYNC 1 +#define SCIF_SEND_BLOCK 1 +#define SCIF_RECV_BLOCK 1 + +enum { + SCIF_PROT_READ = (1 << 0), + SCIF_PROT_WRITE = (1 << 1) +}; + +enum { + SCIF_MAP_FIXED = 0x10, + SCIF_MAP_KERNEL = 0x20, +}; + +enum { + SCIF_FENCE_INIT_SELF = (1 << 0), + SCIF_FENCE_INIT_PEER = (1 << 1), + SCIF_SIGNAL_LOCAL = (1 << 4), + SCIF_SIGNAL_REMOTE = (1 << 5) +}; + +enum { + SCIF_RMA_USECPU = (1 << 0), + SCIF_RMA_USECACHE = (1 << 1), + SCIF_RMA_SYNC = (1 << 2), + SCIF_RMA_ORDERED = (1 << 3) +}; + +/* End of SCIF Admin Reserved Ports */ +#define SCIF_ADMIN_PORT_END 1024 + +/* End of SCIF Reserved Ports */ +#define SCIF_PORT_RSVD 1088 + +typedef struct scif_endpt *scif_epd_t; + +#define SCIF_OPEN_FAILED ((scif_epd_t)-1) +#define SCIF_REGISTER_FAILED ((off_t)-1) +#define SCIF_MMAP_FAILED ((void *)-1) + +/** + * scif_open() - Create an endpoint + * + * Return: + * Upon successful completion, scif_open() returns an endpoint descriptor to + * be used in subsequent SCIF functions calls to refer to that endpoint; + * otherwise in user mode SCIF_OPEN_FAILED (that is ((scif_epd_t)-1)) is + * returned and errno is set to indicate the error; in kernel mode a NULL + * scif_epd_t is returned. + * + * Errors: + * ENOMEM - Insufficient kernel memory was available + */ +scif_epd_t scif_open(void); + +/** + * scif_bind() - Bind an endpoint to a port + * @epd: endpoint descriptor + * @pn: port number + * + * scif_bind() binds endpoint epd to port pn, where pn is a port number on the + * local node. If pn is zero, a port number greater than or equal to + * SCIF_PORT_RSVD is assigned and returned. Each endpoint may be bound to + * exactly one local port. Ports less than 1024 when requested can only be bound + * by system (or root) processes or by processes executed by privileged users. + * + * Return: + * Upon successful completion, scif_bind() returns the port number to which epd + * is bound; otherwise in user mode -1 is returned and errno is set to + * indicate the error; in kernel mode the negative of one of the following + * errors is returned. + * + * Errors: + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * EINVAL - the endpoint or the port is already bound + * EISCONN - The endpoint is already connected + * ENOSPC - No port number available for assignment + * EACCES - The port requested is protected and the user is not the superuser + */ +int scif_bind(scif_epd_t epd, u16 pn); + +/** + * scif_listen() - Listen for connections on an endpoint + * @epd: endpoint descriptor + * @backlog: maximum pending connection requests + * + * scif_listen() marks the endpoint epd as a listening endpoint - that is, as + * an endpoint that will be used to accept incoming connection requests. Once + * so marked, the endpoint is said to be in the listening state and may not be + * used as the endpoint of a connection. + * + * The endpoint, epd, must have been bound to a port. + * + * The backlog argument defines the maximum length to which the queue of + * pending connections for epd may grow. If a connection request arrives when + * the queue is full, the client may receive an error with an indication that + * the connection was refused. + * + * Return: + * Upon successful completion, scif_listen() returns 0; otherwise in user mode + * -1 is returned and errno is set to indicate the error; in kernel mode the + * negative of one of the following errors is returned. + * + * Errors: + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * EINVAL - the endpoint is not bound to a port + * EISCONN - The endpoint is already connected or listening + */ +int scif_listen(scif_epd_t epd, int backlog); + +/** + * scif_connect() - Initiate a connection on a port + * @epd: endpoint descriptor + * @dst: global id of port to which to connect + * + * The scif_connect() function requests the connection of endpoint epd to remote + * port dst. If the connection is successful, a peer endpoint, bound to dst, is + * created on node dst.node. On successful return, the connection is complete. + * + * If the endpoint epd has not already been bound to a port, scif_connect() + * will bind it to an unused local port. + * + * A connection is terminated when an endpoint of the connection is closed, + * either explicitly by scif_close(), or when a process that owns one of the + * endpoints of the connection is terminated. + * + * In user space, scif_connect() supports an asynchronous connection mode + * if the application has set the O_NONBLOCK flag on the endpoint via the + * fcntl() system call. Setting this flag will result in the calling process + * not to wait during scif_connect(). + * + * Return: + * Upon successful completion, scif_connect() returns the port ID to which the + * endpoint, epd, is bound; otherwise in user mode -1 is returned and errno is + * set to indicate the error; in kernel mode the negative of one of the + * following errors is returned. + * + * Errors: + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * ECONNREFUSED - The destination was not listening for connections or refused + * the connection request + * EINVAL - dst.port is not a valid port ID + * EISCONN - The endpoint is already connected + * ENOMEM - No buffer space is available + * ENODEV - The destination node does not exist, or the node is lost or existed, + * but is not currently in the network since it may have crashed + * ENOSPC - No port number available for assignment + * EOPNOTSUPP - The endpoint is listening and cannot be connected + */ +int scif_connect(scif_epd_t epd, struct scif_port_id *dst); + +/** + * scif_accept() - Accept a connection on an endpoint + * @epd: endpoint descriptor + * @peer: global id of port to which connected + * @newepd: new connected endpoint descriptor + * @flags: flags + * + * The scif_accept() call extracts the first connection request from the queue + * of pending connections for the port on which epd is listening. scif_accept() + * creates a new endpoint, bound to the same port as epd, and allocates a new + * SCIF endpoint descriptor, returned in newepd, for the endpoint. The new + * endpoint is connected to the endpoint through which the connection was + * requested. epd is unaffected by this call, and remains in the listening + * state. + * + * On successful return, peer holds the global port identifier (node id and + * local port number) of the port which requested the connection. + * + * A connection is terminated when an endpoint of the connection is closed, + * either explicitly by scif_close(), or when a process that owns one of the + * endpoints of the connection is terminated. + * + * The number of connections that can (subsequently) be accepted on epd is only + * limited by system resources (memory). + * + * The flags argument is formed by OR'ing together zero or more of the + * following values. + * SCIF_ACCEPT_SYNC - block until a connection request is presented. If + * SCIF_ACCEPT_SYNC is not in flags, and no pending + * connections are present on the queue, scif_accept() + * fails with an EAGAIN error + * + * In user mode, the select() and poll() functions can be used to determine + * when there is a connection request. In kernel mode, the scif_poll() + * function may be used for this purpose. A readable event will be delivered + * when a connection is requested. + * + * Return: + * Upon successful completion, scif_accept() returns 0; otherwise in user mode + * -1 is returned and errno is set to indicate the error; in kernel mode the + * negative of one of the following errors is returned. + * + * Errors: + * EAGAIN - SCIF_ACCEPT_SYNC is not set and no connections are present to be + * accepted or SCIF_ACCEPT_SYNC is not set and remote node failed to complete + * its connection request + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * EINTR - Interrupted function + * EINVAL - epd is not a listening endpoint, or flags is invalid, or peer is + * NULL, or newepd is NULL + * ENODEV - The requesting node is lost or existed, but is not currently in the + * network since it may have crashed + * ENOMEM - Not enough space + * ENOENT - Secondary part of epd registration failed + */ +int scif_accept(scif_epd_t epd, struct scif_port_id *peer, scif_epd_t + *newepd, int flags); + +/** + * scif_close() - Close an endpoint + * @epd: endpoint descriptor + * + * scif_close() closes an endpoint and performs necessary teardown of + * facilities associated with that endpoint. + * + * If epd is a listening endpoint then it will no longer accept connection + * requests on the port to which it is bound. Any pending connection requests + * are rejected. + * + * If epd is a connected endpoint, then its peer endpoint is also closed. RMAs + * which are in-process through epd or its peer endpoint will complete before + * scif_close() returns. Registered windows of the local and peer endpoints are + * released as if scif_unregister() was called against each window. + * + * Closing a SCIF endpoint does not affect local registered memory mapped by + * a SCIF endpoint on a remote node. The local memory remains mapped by the peer + * SCIF endpoint explicitly removed by calling munmap(..) by the peer. + * + * If the peer endpoint's receive queue is not empty at the time that epd is + * closed, then the peer endpoint can be passed as the endpoint parameter to + * scif_recv() until the receive queue is empty. + * + * epd is freed and may no longer be accessed. + * + * Return: + * Upon successful completion, scif_close() returns 0; otherwise in user mode + * -1 is returned and errno is set to indicate the error; in kernel mode the + * negative of one of the following errors is returned. + * + * Errors: + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + */ +int scif_close(scif_epd_t epd); + +/** + * scif_send() - Send a message + * @epd: endpoint descriptor + * @msg: message buffer address + * @len: message length + * @flags: blocking mode flags + * + * scif_send() sends data to the peer of endpoint epd. Up to len bytes of data + * are copied from memory starting at address msg. On successful execution the + * return value of scif_send() is the number of bytes that were sent, and is + * zero if no bytes were sent because len was zero. scif_send() may be called + * only when the endpoint is in a connected state. + * + * If a scif_send() call is non-blocking, then it sends only those bytes which + * can be sent without waiting, up to a maximum of len bytes. + * + * If a scif_send() call is blocking, then it normally returns after sending + * all len bytes. If a blocking call is interrupted or the connection is + * reset, the call is considered successful if some bytes were sent or len is + * zero, otherwise the call is considered unsuccessful. + * + * In user mode, the select() and poll() functions can be used to determine + * when the send queue is not full. In kernel mode, the scif_poll() function + * may be used for this purpose. + * + * It is recommended that scif_send()/scif_recv() only be used for short + * control-type message communication between SCIF endpoints. The SCIF RMA + * APIs are expected to provide better performance for transfer sizes of + * 1024 bytes or longer for the current MIC hardware and software + * implementation. + * + * scif_send() will block until the entire message is sent if SCIF_SEND_BLOCK + * is passed as the flags argument. + * + * Return: + * Upon successful completion, scif_send() returns the number of bytes sent; + * otherwise in user mode -1 is returned and errno is set to indicate the + * error; in kernel mode the negative of one of the following errors is + * returned. + * + * Errors: + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * ECONNRESET - Connection reset by peer + * EFAULT - An invalid address was specified for a parameter + * EINVAL - flags is invalid, or len is negative + * ENODEV - The remote node is lost or existed, but is not currently in the + * network since it may have crashed + * ENOMEM - Not enough space + * ENOTCONN - The endpoint is not connected + */ +int scif_send(scif_epd_t epd, void *msg, int len, int flags); + +/** + * scif_recv() - Receive a message + * @epd: endpoint descriptor + * @msg: message buffer address + * @len: message buffer length + * @flags: blocking mode flags + * + * scif_recv() receives data from the peer of endpoint epd. Up to len bytes of + * data are copied to memory starting at address msg. On successful execution + * the return value of scif_recv() is the number of bytes that were received, + * and is zero if no bytes were received because len was zero. scif_recv() may + * be called only when the endpoint is in a connected state. + * + * If a scif_recv() call is non-blocking, then it receives only those bytes + * which can be received without waiting, up to a maximum of len bytes. + * + * If a scif_recv() call is blocking, then it normally returns after receiving + * all len bytes. If the blocking call was interrupted due to a disconnection, + * subsequent calls to scif_recv() will copy all bytes received upto the point + * of disconnection. + * + * In user mode, the select() and poll() functions can be used to determine + * when data is available to be received. In kernel mode, the scif_poll() + * function may be used for this purpose. + * + * It is recommended that scif_send()/scif_recv() only be used for short + * control-type message communication between SCIF endpoints. The SCIF RMA + * APIs are expected to provide better performance for transfer sizes of + * 1024 bytes or longer for the current MIC hardware and software + * implementation. + * + * scif_recv() will block until the entire message is received if + * SCIF_RECV_BLOCK is passed as the flags argument. + * + * Return: + * Upon successful completion, scif_recv() returns the number of bytes + * received; otherwise in user mode -1 is returned and errno is set to + * indicate the error; in kernel mode the negative of one of the following + * errors is returned. + * + * Errors: + * EAGAIN - The destination node is returning from a low power state + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * ECONNRESET - Connection reset by peer + * EFAULT - An invalid address was specified for a parameter + * EINVAL - flags is invalid, or len is negative + * ENODEV - The remote node is lost or existed, but is not currently in the + * network since it may have crashed + * ENOMEM - Not enough space + * ENOTCONN - The endpoint is not connected + */ +int scif_recv(scif_epd_t epd, void *msg, int len, int flags); + +/** + * scif_register() - Mark a memory region for remote access. + * @epd: endpoint descriptor + * @addr: starting virtual address + * @len: length of range + * @offset: offset of window + * @prot_flags: read/write protection flags + * @map_flags: mapping flags + * + * The scif_register() function opens a window, a range of whole pages of the + * registered address space of the endpoint epd, starting at offset po and + * continuing for len bytes. The value of po, further described below, is a + * function of the parameters offset and len, and the value of map_flags. Each + * page of the window represents the physical memory page which backs the + * corresponding page of the range of virtual address pages starting at addr + * and continuing for len bytes. addr and len are constrained to be multiples + * of the page size. A successful scif_register() call returns po. + * + * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset + * exactly, and offset is constrained to be a multiple of the page size. The + * mapping established by scif_register() will not replace any existing + * registration; an error is returned if any page within the range [offset, + * offset + len - 1] intersects an existing window. + * + * When SCIF_MAP_FIXED is not set, the implementation uses offset in an + * implementation-defined manner to arrive at po. The po value so chosen will + * be an area of the registered address space that the implementation deems + * suitable for a mapping of len bytes. An offset value of 0 is interpreted as + * granting the implementation complete freedom in selecting po, subject to + * constraints described below. A non-zero value of offset is taken to be a + * suggestion of an offset near which the mapping should be placed. When the + * implementation selects a value for po, it does not replace any extant + * window. In all cases, po will be a multiple of the page size. + * + * The physical pages which are so represented by a window are available for + * access in calls to mmap(), scif_readfrom(), scif_writeto(), + * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the + * physical pages represented by the window will not be reused by the memory + * subsystem for any other purpose. Note that the same physical page may be + * represented by multiple windows. + * + * Subsequent operations which change the memory pages to which virtual + * addresses are mapped (such as mmap(), munmap()) have no effect on + * existing window. + * + * If the process will fork(), it is recommended that the registered + * virtual address range be marked with MADV_DONTFORK. Doing so will prevent + * problems due to copy-on-write semantics. + * + * The prot_flags argument is formed by OR'ing together one or more of the + * following values. + * SCIF_PROT_READ - allow read operations from the window + * SCIF_PROT_WRITE - allow write operations to the window + * + * The map_flags argument can be set to SCIF_MAP_FIXED which interprets a + * fixed offset. + * + * Return: + * Upon successful completion, scif_register() returns the offset at which the + * mapping was placed (po); otherwise in user mode SCIF_REGISTER_FAILED (that + * is (off_t *)-1) is returned and errno is set to indicate the error; in + * kernel mode the negative of one of the following errors is returned. + * + * Errors: + * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags, and pages in the range + * [offset, offset + len -1] are already registered + * EAGAIN - The mapping could not be performed due to lack of resources + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * ECONNRESET - Connection reset by peer + * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid + * EINVAL - map_flags is invalid, or prot_flags is invalid, or SCIF_MAP_FIXED is + * set in flags, and offset is not a multiple of the page size, or addr is not a + * multiple of the page size, or len is not a multiple of the page size, or is + * 0, or offset is negative + * ENODEV - The remote node is lost or existed, but is not currently in the + * network since it may have crashed + * ENOMEM - Not enough space + * ENOTCONN -The endpoint is not connected + */ +off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset, + int prot_flags, int map_flags); + +/** + * scif_unregister() - Mark a memory region for remote access. + * @epd: endpoint descriptor + * @offset: start of range to unregister + * @len: length of range to unregister + * + * The scif_unregister() function closes those previously registered windows + * which are entirely within the range [offset, offset + len - 1]. It is an + * error to specify a range which intersects only a subrange of a window. + * + * On a successful return, pages within the window may no longer be specified + * in calls to mmap(), scif_readfrom(), scif_writeto(), scif_vreadfrom(), + * scif_vwriteto(), scif_get_pages, and scif_fence_signal(). The window, + * however, continues to exist until all previous references against it are + * removed. A window is referenced if there is a mapping to it created by + * mmap(), or if scif_get_pages() was called against the window + * (and the pages have not been returned via scif_put_pages()). A window is + * also referenced while an RMA, in which some range of the window is a source + * or destination, is in progress. Finally a window is referenced while some + * offset in that window was specified to scif_fence_signal(), and the RMAs + * marked by that call to scif_fence_signal() have not completed. While a + * window is in this state, its registered address space pages are not + * available for use in a new registered window. + * + * When all such references to the window have been removed, its references to + * all the physical pages which it represents are removed. Similarly, the + * registered address space pages of the window become available for + * registration in a new window. + * + * Return: + * Upon successful completion, scif_unregister() returns 0; otherwise in user + * mode -1 is returned and errno is set to indicate the error; in kernel mode + * the negative of one of the following errors is returned. In the event of an + * error, no windows are unregistered. + * + * Errors: + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * ECONNRESET - Connection reset by peer + * EINVAL - the range [offset, offset + len - 1] intersects a subrange of a + * window, or offset is negative + * ENODEV - The remote node is lost or existed, but is not currently in the + * network since it may have crashed + * ENOTCONN - The endpoint is not connected + * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid for the + * registered address space of epd + */ +int scif_unregister(scif_epd_t epd, off_t offset, size_t len); + +/** + * scif_readfrom() - Copy from a remote address space + * @epd: endpoint descriptor + * @loffset: offset in local registered address space to + * which to copy + * @len: length of range to copy + * @roffset: offset in remote registered address space + * from which to copy + * @rma_flags: transfer mode flags + * + * scif_readfrom() copies len bytes from the remote registered address space of + * the peer of endpoint epd, starting at the offset roffset to the local + * registered address space of epd, starting at the offset loffset. + * + * Each of the specified ranges [loffset, loffset + len - 1] and [roffset, + * roffset + len - 1] must be within some registered window or windows of the + * local and remote nodes. A range may intersect multiple registered windows, + * but only if those windows are contiguous in the registered address space. + * + * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using + * programmed read/writes. Otherwise the data is copied using DMA. If rma_- + * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the + * transfer is complete. Otherwise, the transfer may be performed asynchron- + * ously. The order in which any two asynchronous RMA operations complete + * is non-deterministic. The synchronization functions, scif_fence_mark()/ + * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to + * the completion of asynchronous RMA operations on the same endpoint. + * + * The DMA transfer of individual bytes is not guaranteed to complete in + * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last + * cacheline or partial cacheline of the source range will become visible on + * the destination node after all other transferred data in the source + * range has become visible on the destination node. + * + * The optimal DMA performance will likely be realized if both + * loffset and roffset are cacheline aligned (are a multiple of 64). Lower + * performance will likely be realized if loffset and roffset are not + * cacheline aligned but are separated by some multiple of 64. The lowest level + * of performance is likely if loffset and roffset are not separated by a + * multiple of 64. + * + * The rma_flags argument is formed by ORing together zero or more of the + * following values. + * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA + * engine. + * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the + * transfer has completed. Passing this flag results in the + * current implementation busy waiting and consuming CPU cycles + * while the DMA transfer is in progress for best performance by + * avoiding the interrupt latency. + * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of + * the source range becomes visible on the destination node + * after all other transferred data in the source range has + * become visible on the destination + * + * Return: + * Upon successful completion, scif_readfrom() returns 0; otherwise in user + * mode -1 is returned and errno is set to indicate the error; in kernel mode + * the negative of one of the following errors is returned. + * + * Errors: + * EACCESS - Attempt to write to a read-only range + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * ECONNRESET - Connection reset by peer + * EINVAL - rma_flags is invalid + * ENODEV - The remote node is lost or existed, but is not currently in the + * network since it may have crashed + * ENOTCONN - The endpoint is not connected + * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered + * address space of epd, or, The range [roffset, roffset + len - 1] is invalid + * for the registered address space of the peer of epd, or loffset or roffset + * is negative + */ +int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, off_t + roffset, int rma_flags); + +/** + * scif_writeto() - Copy to a remote address space + * @epd: endpoint descriptor + * @loffset: offset in local registered address space + * from which to copy + * @len: length of range to copy + * @roffset: offset in remote registered address space to + * which to copy + * @rma_flags: transfer mode flags + * + * scif_writeto() copies len bytes from the local registered address space of + * epd, starting at the offset loffset to the remote registered address space + * of the peer of endpoint epd, starting at the offset roffset. + * + * Each of the specified ranges [loffset, loffset + len - 1] and [roffset, + * roffset + len - 1] must be within some registered window or windows of the + * local and remote nodes. A range may intersect multiple registered windows, + * but only if those windows are contiguous in the registered address space. + * + * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using + * programmed read/writes. Otherwise the data is copied using DMA. If rma_- + * flags includes SCIF_RMA_SYNC, then scif_writeto() will return after the + * transfer is complete. Otherwise, the transfer may be performed asynchron- + * ously. The order in which any two asynchronous RMA operations complete + * is non-deterministic. The synchronization functions, scif_fence_mark()/ + * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to + * the completion of asynchronous RMA operations on the same endpoint. + * + * The DMA transfer of individual bytes is not guaranteed to complete in + * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last + * cacheline or partial cacheline of the source range will become visible on + * the destination node after all other transferred data in the source + * range has become visible on the destination node. + * + * The optimal DMA performance will likely be realized if both + * loffset and roffset are cacheline aligned (are a multiple of 64). Lower + * performance will likely be realized if loffset and roffset are not cacheline + * aligned but are separated by some multiple of 64. The lowest level of + * performance is likely if loffset and roffset are not separated by a multiple + * of 64. + * + * The rma_flags argument is formed by ORing together zero or more of the + * following values. + * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA + * engine. + * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the + * transfer has completed. Passing this flag results in the + * current implementation busy waiting and consuming CPU cycles + * while the DMA transfer is in progress for best performance by + * avoiding the interrupt latency. + * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of + * the source range becomes visible on the destination node + * after all other transferred data in the source range has + * become visible on the destination + * + * Return: + * Upon successful completion, scif_readfrom() returns 0; otherwise in user + * mode -1 is returned and errno is set to indicate the error; in kernel mode + * the negative of one of the following errors is returned. + * + * Errors: + * EACCESS - Attempt to write to a read-only range + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * ECONNRESET - Connection reset by peer + * EINVAL - rma_flags is invalid + * ENODEV - The remote node is lost or existed, but is not currently in the + * network since it may have crashed + * ENOTCONN - The endpoint is not connected + * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered + * address space of epd, or, The range [roffset , roffset + len -1] is invalid + * for the registered address space of the peer of epd, or loffset or roffset + * is negative + */ +int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t + roffset, int rma_flags); + +/** + * scif_vreadfrom() - Copy from a remote address space + * @epd: endpoint descriptor + * @addr: address to which to copy + * @len: length of range to copy + * @roffset: offset in remote registered address space + * from which to copy + * @rma_flags: transfer mode flags + * + * scif_vreadfrom() copies len bytes from the remote registered address + * space of the peer of endpoint epd, starting at the offset roffset, to local + * memory, starting at addr. + * + * The specified range [roffset, roffset + len - 1] must be within some + * registered window or windows of the remote nodes. The range may + * intersect multiple registered windows, but only if those windows are + * contiguous in the registered address space. + * + * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using + * programmed read/writes. Otherwise the data is copied using DMA. If rma_- + * flags includes SCIF_RMA_SYNC, then scif_vreadfrom() will return after the + * transfer is complete. Otherwise, the transfer may be performed asynchron- + * ously. The order in which any two asynchronous RMA operations complete + * is non-deterministic. The synchronization functions, scif_fence_mark()/ + * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to + * the completion of asynchronous RMA operations on the same endpoint. + * + * The DMA transfer of individual bytes is not guaranteed to complete in + * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last + * cacheline or partial cacheline of the source range will become visible on + * the destination node after all other transferred data in the source + * range has become visible on the destination node. + * + * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back + * the specified local memory range may be remain in a pinned state even after + * the specified transfer completes. This may reduce overhead if some or all of + * the same virtual address range is referenced in a subsequent call of + * scif_vreadfrom() or scif_vwriteto(). + * + * The optimal DMA performance will likely be realized if both + * addr and roffset are cacheline aligned (are a multiple of 64). Lower + * performance will likely be realized if addr and roffset are not + * cacheline aligned but are separated by some multiple of 64. The lowest level + * of performance is likely if addr and roffset are not separated by a + * multiple of 64. + * + * The rma_flags argument is formed by ORing together zero or more of the + * following values. + * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA + * engine. + * SCIF_RMA_USECACHE - enable registration caching + * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the + * transfer has completed. Passing this flag results in the + * current implementation busy waiting and consuming CPU cycles + * while the DMA transfer is in progress for best performance by + * avoiding the interrupt latency. + * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of + * the source range becomes visible on the destination node + * after all other transferred data in the source range has + * become visible on the destination + * + * Return: + * Upon successful completion, scif_vreadfrom() returns 0; otherwise in user + * mode -1 is returned and errno is set to indicate the error; in kernel mode + * the negative of one of the following errors is returned. + * + * Errors: + * EACCESS - Attempt to write to a read-only range + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * ECONNRESET - Connection reset by peer + * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid + * EINVAL - rma_flags is invalid + * ENODEV - The remote node is lost or existed, but is not currently in the + * network since it may have crashed + * ENOTCONN - The endpoint is not connected + * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the + * registered address space of epd + */ +int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, off_t roffset, + int rma_flags); + +/** + * scif_vwriteto() - Copy to a remote address space + * @epd: endpoint descriptor + * @addr: address from which to copy + * @len: length of range to copy + * @roffset: offset in remote registered address space to + * which to copy + * @rma_flags: transfer mode flags + * + * scif_vwriteto() copies len bytes from the local memory, starting at addr, to + * the remote registered address space of the peer of endpoint epd, starting at + * the offset roffset. + * + * The specified range [roffset, roffset + len - 1] must be within some + * registered window or windows of the remote nodes. The range may intersect + * multiple registered windows, but only if those windows are contiguous in the + * registered address space. + * + * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using + * programmed read/writes. Otherwise the data is copied using DMA. If rma_- + * flags includes SCIF_RMA_SYNC, then scif_vwriteto() will return after the + * transfer is complete. Otherwise, the transfer may be performed asynchron- + * ously. The order in which any two asynchronous RMA operations complete + * is non-deterministic. The synchronization functions, scif_fence_mark()/ + * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to + * the completion of asynchronous RMA operations on the same endpoint. + * + * The DMA transfer of individual bytes is not guaranteed to complete in + * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last + * cacheline or partial cacheline of the source range will become visible on + * the destination node after all other transferred data in the source + * range has become visible on the destination node. + * + * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back + * the specified local memory range may be remain in a pinned state even after + * the specified transfer completes. This may reduce overhead if some or all of + * the same virtual address range is referenced in a subsequent call of + * scif_vreadfrom() or scif_vwriteto(). + * + * The optimal DMA performance will likely be realized if both + * addr and offset are cacheline aligned (are a multiple of 64). Lower + * performance will likely be realized if addr and offset are not cacheline + * aligned but are separated by some multiple of 64. The lowest level of + * performance is likely if addr and offset are not separated by a multiple of + * 64. + * + * The rma_flags argument is formed by ORing together zero or more of the + * following values. + * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA + * engine. + * SCIF_RMA_USECACHE - allow registration caching + * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the + * transfer has completed. Passing this flag results in the + * current implementation busy waiting and consuming CPU cycles + * while the DMA transfer is in progress for best performance by + * avoiding the interrupt latency. + * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of + * the source range becomes visible on the destination node + * after all other transferred data in the source range has + * become visible on the destination + * + * Return: + * Upon successful completion, scif_vwriteto() returns 0; otherwise in user + * mode -1 is returned and errno is set to indicate the error; in kernel mode + * the negative of one of the following errors is returned. + * + * Errors: + * EACCESS - Attempt to write to a read-only range + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * ECONNRESET - Connection reset by peer + * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid + * EINVAL - rma_flags is invalid + * ENODEV - The remote node is lost or existed, but is not currently in the + * network since it may have crashed + * ENOTCONN - The endpoint is not connected + * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the + * registered address space of epd + */ +int scif_vwriteto(scif_epd_t epd, void *addr, size_t len, off_t roffset, + int rma_flags); + +/** + * scif_fence_mark() - Mark previously issued RMAs + * @epd: endpoint descriptor + * @flags: control flags + * @mark: marked value returned as output. + * + * scif_fence_mark() returns after marking the current set of all uncompleted + * RMAs initiated through the endpoint epd or the current set of all + * uncompleted RMAs initiated through the peer of endpoint epd. The RMAs are + * marked with a value returned at mark. The application may subsequently call + * scif_fence_wait(), passing the value returned at mark, to await completion + * of all RMAs so marked. + * + * The flags argument has exactly one of the following values. + * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint + * epd are marked + * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer + * of endpoint epd are marked + * + * Return: + * Upon successful completion, scif_fence_mark() returns 0; otherwise in user + * mode -1 is returned and errno is set to indicate the error; in kernel mode + * the negative of one of the following errors is returned. + * + * Errors: + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * ECONNRESET - Connection reset by peer + * EINVAL - flags is invalid + * ENODEV - The remote node is lost or existed, but is not currently in the + * network since it may have crashed + * ENOTCONN - The endpoint is not connected + * ENOMEM - Insufficient kernel memory was available + */ +int scif_fence_mark(scif_epd_t epd, int flags, int *mark); + +/** + * scif_fence_wait() - Wait for completion of marked RMAs + * @epd: endpoint descriptor + * @mark: mark request + * + * scif_fence_wait() returns after all RMAs marked with mark have completed. + * The value passed in mark must have been obtained in a previous call to + * scif_fence_mark(). + * + * Return: + * Upon successful completion, scif_fence_wait() returns 0; otherwise in user + * mode -1 is returned and errno is set to indicate the error; in kernel mode + * the negative of one of the following errors is returned. + * + * Errors: + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * ECONNRESET - Connection reset by peer + * ENODEV - The remote node is lost or existed, but is not currently in the + * network since it may have crashed + * ENOTCONN - The endpoint is not connected + * ENOMEM - Insufficient kernel memory was available + */ +int scif_fence_wait(scif_epd_t epd, int mark); + +/** + * scif_fence_signal() - Request a memory update on completion of RMAs + * @epd: endpoint descriptor + * @loff: local offset + * @lval: local value to write to loffset + * @roff: remote offset + * @rval: remote value to write to roffset + * @flags: flags + * + * scif_fence_signal() returns after marking the current set of all uncompleted + * RMAs initiated through the endpoint epd or marking the current set of all + * uncompleted RMAs initiated through the peer of endpoint epd. + * + * If flags includes SCIF_SIGNAL_LOCAL, then on completion of the RMAs in the + * marked set, lval is written to memory at the address corresponding to offset + * loff in the local registered address space of epd. loff must be within a + * registered window. If flags includes SCIF_SIGNAL_REMOTE, then on completion + * of the RMAs in the marked set, rval is written to memory at the address + * corresponding to offset roff in the remote registered address space of epd. + * roff must be within a remote registered window of the peer of epd. Note + * that any specified offset must be DWORD (4 byte / 32 bit) aligned. + * + * The flags argument is formed by OR'ing together the following. + * Exactly one of the following values. + * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint + * epd are marked + * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer + * of endpoint epd are marked + * One or more of the following values. + * SCIF_SIGNAL_LOCAL - On completion of the marked set of RMAs, write lval to + * memory at the address corresponding to offset loff in the local + * registered address space of epd. + * SCIF_SIGNAL_REMOTE - On completion of the marked set of RMAs, write rval to + * memory at the address corresponding to offset roff in the remote + * registered address space of epd. + * + * Return: + * Upon successful completion, scif_fence_signal() returns 0; otherwise in + * user mode -1 is returned and errno is set to indicate the error; in kernel + * mode the negative of one of the following errors is returned. + * + * Errors: + * EBADF, ENOTTY - epd is not a valid endpoint descriptor + * ECONNRESET - Connection reset by peer + * EINVAL - flags is invalid, or loff or roff are not DWORD aligned + * ENODEV - The remote node is lost or existed, but is not currently in the + * network since it may have crashed + * ENOTCONN - The endpoint is not connected + * ENXIO - loff is invalid for the registered address of epd, or roff is invalid + * for the registered address space, of the peer of epd + */ +int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval, off_t roff, + u64 rval, int flags); + +/** + * scif_get_node_ids() - Return information about online nodes + * @nodes: array in which to return online node IDs + * @len: number of entries in the nodes array + * @self: address to place the node ID of the local node + * + * scif_get_node_ids() fills in the nodes array with up to len node IDs of the + * nodes in the SCIF network. If there is not enough space in nodes, as + * indicated by the len parameter, only len node IDs are returned in nodes. The + * return value of scif_get_node_ids() is the total number of nodes currently in + * the SCIF network. By checking the return value against the len parameter, + * the user may determine if enough space for nodes was allocated. + * + * The node ID of the local node is returned at self. + * + * Return: + * Upon successful completion, scif_get_node_ids() returns the actual number of + * online nodes in the SCIF network including 'self'; otherwise in user mode + * -1 is returned and errno is set to indicate the error; in kernel mode no + * errors are returned. + * + * Errors: + * EFAULT - Bad address + */ +int scif_get_node_ids(u16 *nodes, int len, u16 *self); + +#endif /* __SCIF_H__ */ -- cgit v1.2.3 From 3647a83d9dcf00b8e17777ec8aa1e48f1ed4fe06 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Sat, 11 Apr 2015 18:07:39 -0700 Subject: Drivers: hv: util: move kvp/vss function declarations to hyperv_vmbus.h These declarations are internal to hv_util module and hv_fcopy_* declarations already reside there. Signed-off-by: Vitaly Kuznetsov Tested-by: Alex Ng Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 902c37aef67e..1744148a39f9 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1236,13 +1236,6 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *, struct icmsg_negotiate *, u8 *, int, int); -int hv_kvp_init(struct hv_util_service *); -void hv_kvp_deinit(void); -void hv_kvp_onchannelcallback(void *); - -int hv_vss_init(struct hv_util_service *); -void hv_vss_deinit(void); -void hv_vss_onchannelcallback(void *); void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid); extern struct resource hyperv_mmio; -- cgit v1.2.3 From db9ba2088f6507fee370904f02db1eb9b49bd088 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Wed, 22 Apr 2015 21:31:31 -0700 Subject: drivers: hv: vmbus: Get rid of some unused definitions Get rid of some unused definitions. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 1744148a39f9..e29ccddc6300 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -389,10 +389,6 @@ enum vmbus_channel_message_type { CHANNELMSG_INITIATE_CONTACT = 14, CHANNELMSG_VERSION_RESPONSE = 15, CHANNELMSG_UNLOAD = 16, -#ifdef VMBUS_FEATURE_PARENT_OR_PEER_MEMORY_MAPPED_INTO_A_CHILD - CHANNELMSG_VIEWRANGE_ADD = 17, - CHANNELMSG_VIEWRANGE_REMOVE = 18, -#endif CHANNELMSG_COUNT }; @@ -549,21 +545,6 @@ struct vmbus_channel_gpadl_torndown { u32 gpadl; } __packed; -#ifdef VMBUS_FEATURE_PARENT_OR_PEER_MEMORY_MAPPED_INTO_A_CHILD -struct vmbus_channel_view_range_add { - struct vmbus_channel_message_header header; - PHYSICAL_ADDRESS viewrange_base; - u64 viewrange_length; - u32 child_relid; -} __packed; - -struct vmbus_channel_view_range_remove { - struct vmbus_channel_message_header header; - PHYSICAL_ADDRESS viewrange_base; - u32 child_relid; -} __packed; -#endif - struct vmbus_channel_relid_released { struct vmbus_channel_message_header header; u32 child_relid; -- cgit v1.2.3 From 2db84eff127e3f4b3635edc589cd6a56db8755a3 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Wed, 22 Apr 2015 21:31:32 -0700 Subject: Drivers: hv: vmbus: Implement the protocol for tearing down vmbus state Implement the protocol for tearing down the monitor state established with the host. Signed-off-by: K. Y. Srinivasan Tested-by: Vitaly Kuznetsov Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index e29ccddc6300..ea934864293d 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -389,6 +389,7 @@ enum vmbus_channel_message_type { CHANNELMSG_INITIATE_CONTACT = 14, CHANNELMSG_VERSION_RESPONSE = 15, CHANNELMSG_UNLOAD = 16, + CHANNELMSG_UNLOAD_RESPONSE = 17, CHANNELMSG_COUNT }; -- cgit v1.2.3 From fea844a2b0edd6540d5cde2cd54a8a3c86e9c53f Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov <[mailto:vkuznets@redhat.com]> Date: Wed, 6 May 2015 17:47:43 -0700 Subject: Drivers: hv: vmbus: briefly comment num_sc and next_oc next_oc and num_sc fields of struct vmbus_channel deserve a description. Move them closer to sc_list as these fields are related to it. Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index ea934864293d..3932a993ff5a 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -726,6 +726,15 @@ struct vmbus_channel { * All Sub-channels of a primary channel are linked here. */ struct list_head sc_list; + /* + * Current number of sub-channels. + */ + int num_sc; + /* + * Number of a sub-channel (position within sc_list) which is supposed + * to be used as the next outgoing channel. + */ + int next_oc; /* * The primary channel this sub-channel belongs to. * This will be NULL for the primary channel. @@ -740,9 +749,6 @@ struct vmbus_channel { * link up channels based on their CPU affinity. */ struct list_head percpu_list; - - int num_sc; - int next_oc; }; static inline void set_channel_read_state(struct vmbus_channel *c, bool state) -- cgit v1.2.3 From 80c6e1465948c2e91214f01764f427d31ebedb26 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 21 May 2015 15:49:37 -0700 Subject: driver-core: fix build for !CONFIG_MODULES Commit f2411da74698 ("driver-core: add driver module asynchronous probe support") broke build in case modules are disabled, because in this case "struct module" is not defined and we can't dereference it. Let's define module_requested_async_probing() helper and stub it out if modules are disabled. Reported-by: kbuild test robot Reported-by: Stephen Rothwell Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- include/linux/module.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index f46a47d3c0dc..57f5c0a804c0 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -510,6 +510,11 @@ int unregister_module_notifier(struct notifier_block *nb); extern void print_modules(void); +static inline bool module_requested_async_probing(struct module *module) +{ + return module && module->async_probe_requested; +} + #else /* !CONFIG_MODULES... */ /* Given an address, look for it in the exception tables. */ @@ -620,6 +625,12 @@ static inline int unregister_module_notifier(struct notifier_block *nb) static inline void print_modules(void) { } + +static inline bool module_requested_async_probing(struct module *module) +{ + return false; +} + #endif /* CONFIG_MODULES */ #ifdef CONFIG_SYSFS -- cgit v1.2.3 From 2539b258ec028351af954c169ea1b0ff72023a9f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 8 May 2015 14:45:34 +0100 Subject: drivers/base: cacheinfo: fix annoying typo when DT nodes are absent s/hierarcy/hierarchy/ Maybe the typo will annoy people enough so that they add the missing nodes to their device-tree files, but I still think this is better off fixed. Signed-off-by: Will Deacon Acked-by: Sudeep Holla Signed-off-by: Greg Kroah-Hartman --- include/linux/cacheinfo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 3daf5ed392c9..2189935075b4 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -19,7 +19,7 @@ enum cache_type { /** * struct cacheinfo - represent a cache leaf node * @type: type of the cache - data, inst or unified - * @level: represents the hierarcy in the multi-level cache + * @level: represents the hierarchy in the multi-level cache * @coherency_line_size: size of each cache line usually representing * the minimum amount of data that gets transferred from memory * @number_of_sets: total number of sets, a set is a collection of cache -- cgit v1.2.3 From f4445f8b204de44a8baa4326b0e56537be867427 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 14 May 2015 15:28:24 +0100 Subject: drivers: of/base: move of_init to driver_init Commit 5590f3196b29 ("drivers/core/of: Add symlink to device-tree from devices with an OF node") adds the symlink `of_node` for each device pointing to it's device tree node while creating/initialising it. However the devicetree sysfs is created and setup in of_init which is executed at core_initcall level. For all the devices created before of_init, the following error is thrown: "Error -2(-ENOENT) creating of_node link" Like many other components in driver model, initialize the sysfs support for OF/devicetree from driver_init so that it's ready before any devices are created. Fixes: 5590f3196b29 ("drivers/core/of: Add symlink to device-tree from devices with an OF node") Suggested-by: Rob Herring Cc: Grant Likely Cc: Pawel Moll Cc: Benjamin Herrenschmidt Signed-off-by: Sudeep Holla Tested-by: Robert Schwebel Acked-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- include/linux/of.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index ddeaae6d2083..b871ff9d81d7 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -121,6 +121,8 @@ extern struct device_node *of_stdout; extern raw_spinlock_t devtree_lock; #ifdef CONFIG_OF +void of_core_init(void); + static inline bool is_of_node(struct fwnode_handle *fwnode) { return fwnode && fwnode->type == FWNODE_OF; @@ -376,6 +378,10 @@ bool of_console_check(struct device_node *dn, char *name, int index); #else /* CONFIG_OF */ +static inline void of_core_init(void) +{ +} + static inline bool is_of_node(struct fwnode_handle *fwnode) { return false; -- cgit v1.2.3 From be12a1fe298e8be04d5215364f94654dff81b0bc Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 21 May 2015 16:59:58 +0200 Subject: net: skbuff: add skb_append_pagefrags and use it Signed-off-by: Hannes Frederic Sowa Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b617095adb88..f708936cdd23 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -861,6 +861,9 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, int len, int odd, struct sk_buff *skb), void *from, int length); +int skb_append_pagefrags(struct sk_buff *skb, struct page *page, + int offset, size_t size); + struct skb_seq_state { __u32 lower_offset; __u32 upper_offset; -- cgit v1.2.3 From a60e3cc7c92973a31fad0fd04dc5cf4355d3d1ef Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 21 May 2015 17:00:00 +0200 Subject: net: make skb_splice_bits more configureable Prepare skb_splice_bits to be able to deal with AF_UNIX sockets. AF_UNIX sockets don't use lock_sock/release_sock and thus we have to use a callback to make the locking and unlocking configureable. Signed-off-by: Hannes Frederic Sowa Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f708936cdd23..6b41c15efa27 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -35,6 +35,7 @@ #include #include #include +#include /* A. Checksumming of received packets by device. * @@ -2699,9 +2700,15 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len); __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, __wsum csum); -int skb_splice_bits(struct sk_buff *skb, unsigned int offset, +ssize_t skb_socket_splice(struct sock *sk, + struct pipe_inode_info *pipe, + struct splice_pipe_desc *spd); +int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, struct pipe_inode_info *pipe, unsigned int len, - unsigned int flags); + unsigned int flags, + ssize_t (*splice_cb)(struct sock *, + struct pipe_inode_info *, + struct splice_pipe_desc *)); void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); unsigned int skb_zerocopy_headlen(const struct sk_buff *from); int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, -- cgit v1.2.3 From cc4a84c3da6f9dd6a297dc81fe4437643a60fe03 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 22 May 2015 14:07:30 -0700 Subject: net: phy: bcm7xxx: Fix 7425 PHY ID and flags While adding support for 7425 PHY in the 7xxx PHY driver, the ID that was used was actually coming from an external PHY: a BCM5461x. Fix this by using the proper ID for the internal 7425 PHY and set the PHY_IS_INTERNAL flag, otherwise consumers of this PHY driver would not be able to properly identify it as such. Fixes: d068b02cfdfc2 ("net: phy: add BCM7425 and BCM7429 PHYs") Signed-off-by: Florian Fainelli Reviewed-by: Petri Gynther Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index ae2982c0f7a6..656da2a12ffe 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -17,7 +17,7 @@ #define PHY_ID_BCM7250 0xae025280 #define PHY_ID_BCM7364 0xae025260 #define PHY_ID_BCM7366 0x600d8490 -#define PHY_ID_BCM7425 0x03625e60 +#define PHY_ID_BCM7425 0x600d86b0 #define PHY_ID_BCM7429 0x600d8730 #define PHY_ID_BCM7439 0x600d8480 #define PHY_ID_BCM7439_2 0xae025080 -- cgit v1.2.3 From 496e7ce2a46562938edcb74f65b26068ee8895f6 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 7 May 2015 01:08:08 -0700 Subject: gpiolib: Add missing dummies for the unified device properties interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If GPIOLIB=n: drivers/leds/leds-gpio.c: In function ‘gpio_leds_create’: drivers/leds/leds-gpio.c:187: error: implicit declaration of function ‘devm_get_gpiod_from_child’ drivers/leds/leds-gpio.c:187: warning: assignment makes pointer from integer without a cast Add dummies for fwnode_get_named_gpiod() and devm_get_gpiod_from_child() for the !GPIOLIB case to fix this. Signed-off-by: Geert Uytterhoeven Fixes: 40b7318319281b1b ("gpio: Support for unified device properties interface") Acked-by: Alexandre Courbot Acked-by: Linus Walleij Signed-off-by: Bryan Wu --- include/linux/gpio/consumer.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 3a7c9ffd5ab9..da042657dc31 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -406,6 +406,21 @@ static inline int desc_to_gpio(const struct gpio_desc *desc) return -EINVAL; } +/* Child properties interface */ +struct fwnode_handle; + +static inline struct gpio_desc *fwnode_get_named_gpiod( + struct fwnode_handle *fwnode, const char *propname) +{ + return ERR_PTR(-ENOSYS); +} + +static inline struct gpio_desc *devm_get_gpiod_from_child( + struct device *dev, const char *con_id, struct fwnode_handle *child) +{ + return ERR_PTR(-ENOSYS); +} + #endif /* CONFIG_GPIOLIB */ /* -- cgit v1.2.3 From a57e16cf03339c20b09642f46f60190069ff70c7 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Mon, 25 May 2015 23:29:20 +0200 Subject: dmaengine: pxa: add pxa dmaengine driver This is a new driver for pxa SoCs, which is also compatible with the former mmp_pdma. The rationale behind a new driver (as opposed to incremental patching) was : - the new driver relies on virt-dma, which obsoletes all the internal structures of mmp_pdma (sw_desc, hw_desc, ...), and by consequence all the functions - mmp_pdma allocates dma coherent descriptors containing not only hardware descriptors but linked list information The new driver only puts the dma hardware descriptors (ie. 4 u32) into the dma pool allocated memory. This changes completely the way descriptors are handled - the architecture behind the interrupt/tasklet management was rewritten to be more conforming to virt-dma - the buffers alignment is handled differently The former driver assumed that the DMA channel stopped between each descriptor. The new one chains descriptors to let the channel running. This is a necessary guarantee for real-time high bandwidth usecases such as video capture on "old" architectures such as pxa. - hot chaining / cold chaining / no chaining Whenever possible, submitting a descriptor "hot chains" it to a running channel. There is still no guarantee that the descriptor will be issued, as the channel might be stopped just before the descriptor is submitted. Yet this allows to submit several video buffers, and resubmit a buffer while another is under handling. As before, dma_async_issue_pending() is the only guarantee to have all the buffers issued. When an alignment issue is detected (ie. one address in a descriptor is not a multiple of 8), if the already running channel is in "aligned mode", the channel will stop, and restarted in "misaligned mode" to finished the issued list. - descriptors reusing A submitted, issued and completed descriptor can be reused, ie resubmitted if it was prepared with the proper flag (DMA_PREP_ACK). Only a channel resources release will in this case release that buffer. This allows a rolling ring of buffers to be reused, where there are several thousands of hardware descriptors used (video buffer for example). Additionally, a set of more casual features is introduced : - debugging traces - lockless way to know if a descriptor is terminated or not The driver was tested on zylonite board (pxa3xx) and mioa701 (pxa27x), with dmatest, pxa_camera and pxamci. Signed-off-by: Robert Jarzmik Signed-off-by: Vinod Koul --- include/linux/dma/pxa-dma.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 include/linux/dma/pxa-dma.h (limited to 'include/linux') diff --git a/include/linux/dma/pxa-dma.h b/include/linux/dma/pxa-dma.h new file mode 100644 index 000000000000..3edc99294bf6 --- /dev/null +++ b/include/linux/dma/pxa-dma.h @@ -0,0 +1,27 @@ +#ifndef _PXA_DMA_H_ +#define _PXA_DMA_H_ + +enum pxad_chan_prio { + PXAD_PRIO_HIGHEST = 0, + PXAD_PRIO_NORMAL, + PXAD_PRIO_LOW, + PXAD_PRIO_LOWEST, +}; + +struct pxad_param { + unsigned int drcmr; + enum pxad_chan_prio prio; +}; + +struct dma_chan; + +#ifdef CONFIG_PXA_DMA +bool pxad_filter_fn(struct dma_chan *chan, void *param); +#else +static inline bool pxad_filter_fn(struct dma_chan *chan, void *param) +{ + return false; +} +#endif + +#endif /* _PXA_DMA_H_ */ -- cgit v1.2.3 From 09170a49422bd786be3eac5cec1955257c5a34b7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 18 May 2015 13:59:39 +0200 Subject: KVM: const-ify uses of struct kvm_userspace_memory_region Architecture-specific helpers are not supposed to muck with struct kvm_userspace_memory_region contents. Add const to enforce this. In order to eliminate the only write in __kvm_set_memory_region, the cleaning of deleted slots is pulled up from update_memslots to __kvm_set_memory_region. Reviewed-by: Takuya Yoshikawa Reviewed-by: Radim Krcmar Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 87fd74a04005..fbced7015ebd 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -501,9 +501,9 @@ enum kvm_mr_change { }; int kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem); + const struct kvm_userspace_memory_region *mem); int __kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem); + const struct kvm_userspace_memory_region *mem); void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont); int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, @@ -511,10 +511,10 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, void kvm_arch_memslots_updated(struct kvm *kvm); int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, enum kvm_mr_change change); void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, enum kvm_mr_change change); bool kvm_largepages_enabled(void); -- cgit v1.2.3 From 15f46015ee17681b542432df21747f5c51857156 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 17 May 2015 21:26:08 +0200 Subject: KVM: add memslots argument to kvm_arch_memslots_updated Prepare for the case of multiple address spaces. Reviewed-by: Radim Krcmar Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- include/linux/kvm_types.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index fbced7015ebd..8815f1dffb77 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -508,7 +508,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont); int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages); -void kvm_arch_memslots_updated(struct kvm *kvm); +void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots); int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, const struct kvm_userspace_memory_region *mem, diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 931da7e917cf..1b47a185c2f0 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -28,6 +28,7 @@ struct kvm_run; struct kvm_userspace_memory_region; struct kvm_vcpu; struct kvm_vcpu_init; +struct kvm_memslots; enum kvm_mr_change; -- cgit v1.2.3 From cacce073bfd2b4091fbc58e906e7a9a21f538ff6 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Thu, 14 May 2015 23:05:49 +0200 Subject: bcma: add module_bcma_driver() This makes it possible to save some lines of code in drivers with an simple bcma driver registration. Signed-off-by: Hauke Mehrtens Signed-off-by: Kalle Valo --- include/linux/bcma/bcma.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index e34f906647d3..2ff4a9961e1d 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -305,6 +305,15 @@ int __bcma_driver_register(struct bcma_driver *drv, struct module *owner); extern void bcma_driver_unregister(struct bcma_driver *drv); +/* module_bcma_driver() - Helper macro for drivers that don't do + * anything special in module init/exit. This eliminates a lot of + * boilerplate. Each module may only use this macro once, and + * calling it replaces module_init() and module_exit() + */ +#define module_bcma_driver(__bcma_driver) \ + module_driver(__bcma_driver, bcma_driver_register, \ + bcma_driver_unregister) + /* Set a fallback SPROM. * See kdoc at the function definition for complete documentation. */ extern int bcma_arch_register_fallback_sprom( -- cgit v1.2.3 From 069d4a7b583274e3fd8712c92a035626e0ebf7be Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 3 Mar 2015 11:58:14 +0100 Subject: netfilter: ebtables: fix comment grammar s/stongly inspired on/strongly inspired by/ Signed-off-by: Geert Uytterhoeven Cc: David S. Miller Signed-off-by: Jiri Kosina --- include/linux/netfilter_bridge/ebtables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 34e7a2b7f867..9ac6f263956b 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -6,7 +6,7 @@ * * ebtables.c,v 2.0, April, 2002 * - * This code is stongly inspired on the iptables code which is + * This code is strongly inspired by the iptables code which is * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling */ #ifndef __LINUX_BRIDGE_EFF_H -- cgit v1.2.3 From 7667928601d2981b20011e357904bcb96c365427 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Fri, 8 May 2015 00:02:27 +0900 Subject: rapidio: Fix kerneldoc and comment This patch fix spelling typos found in DocBook/rapidio.xml Ths file was generated from comments in the source files, I had to fix them, instead of the xml file. Signed-off-by: Masanari Iida Signed-off-by: Jiri Kosina --- include/linux/rio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rio.h b/include/linux/rio.h index 6bda06f21930..cde976e86b48 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -298,7 +298,7 @@ struct rio_id_table { * struct rio_net - RIO network info * @node: Node in global list of RIO networks * @devices: List of devices in this network - * @switches: List of switches in this netowrk + * @switches: List of switches in this network * @mports: List of master ports accessing this network * @hport: Default port for accessing this network * @id: RIO network ID -- cgit v1.2.3 From 94268fcd9aea736d24cbdade16e7f7c9419c489e Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Tue, 28 Apr 2015 13:11:28 +0200 Subject: lib: crc-itu-t.[ch] fix 0x0x prefix in integer constants Signed-off-by: Antonio Ospite Acked-by: Greg Kroah-Hartman Signed-off-by: Jiri Kosina --- include/linux/crc-itu-t.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/crc-itu-t.h b/include/linux/crc-itu-t.h index 84920f3cc83e..a9953c762eee 100644 --- a/include/linux/crc-itu-t.h +++ b/include/linux/crc-itu-t.h @@ -3,7 +3,7 @@ * * Implements the standard CRC ITU-T V.41: * Width 16 - * Poly 0x0x1021 (x^16 + x^12 + x^15 + 1) + * Poly 0x1021 (x^16 + x^12 + x^15 + 1) * Init 0 * * This source code is licensed under the GNU General Public License, -- cgit v1.2.3 From e0213bc5467ca5fe44ab04527f0e47998f30c046 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 18 May 2015 20:04:14 +0900 Subject: usb: renesas_usbhs: Change USBHS_TYPE_R8A779x to USBHS_TYPE_RCAR_GEN2 Since the HSUSB controllers of R-Car Gen2 are the same specification (they have 16 pipes and usb-dmac), this patch changes USBHS_TYPE_R8A7790 and USBHS_TYPE_R8A7791 to USBHS_TYPE_RCAR_GEN2. Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- include/linux/usb/renesas_usbhs.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h index f06529c14141..3dd5a781da99 100644 --- a/include/linux/usb/renesas_usbhs.h +++ b/include/linux/usb/renesas_usbhs.h @@ -169,8 +169,7 @@ struct renesas_usbhs_driver_param { #define USBHS_USB_DMAC_XFER_SIZE 32 /* hardcode the xfer size */ }; -#define USBHS_TYPE_R8A7790 1 -#define USBHS_TYPE_R8A7791 2 +#define USBHS_TYPE_RCAR_GEN2 1 /* * option: -- cgit v1.2.3 From a09e23f53e2c14a65a3b14a00060fea163081e1f Mon Sep 17 00:00:00 2001 From: Mian Yousaf Kaukab Date: Sat, 16 May 2015 22:33:35 +0200 Subject: usb: gadget: net2280: check interrupts for all endpoints USB3380 in enhanced mode has 4 IN and 4 OUT endpoints. Check interrupts for all of them. Tested-by: Ricardo Ribalda Delgado Signed-off-by: Mian Yousaf Kaukab Signed-off-by: Felipe Balbi --- include/linux/usb/net2280.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/net2280.h b/include/linux/usb/net2280.h index 148b8fa5b1a2..725120224472 100644 --- a/include/linux/usb/net2280.h +++ b/include/linux/usb/net2280.h @@ -168,6 +168,9 @@ struct net2280_regs { #define ENDPOINT_B_INTERRUPT 2 #define ENDPOINT_A_INTERRUPT 1 #define ENDPOINT_0_INTERRUPT 0 +#define USB3380_IRQSTAT0_EP_INTR_MASK_IN (0xF << 17) +#define USB3380_IRQSTAT0_EP_INTR_MASK_OUT (0xF << 1) + u32 irqstat1; #define POWER_STATE_CHANGE_INTERRUPT 27 #define PCI_ARBITER_TIMEOUT_INTERRUPT 26 -- cgit v1.2.3 From c65c4f052bc3b67989bf54914798513685c54988 Mon Sep 17 00:00:00 2001 From: Mian Yousaf Kaukab Date: Sat, 16 May 2015 22:33:36 +0200 Subject: usb: gadget: net2280: fix use of GPEP in both directions USB3380 enhanced mode allows GPEP to be used in both IN and OUT directions. However, IN and OUT endpoints must use same USB endpoint address (bEndpointAddress). Fix this by setting the ep_cfg.ep_number during initialization and keep it in net2280_enable() Tested-by: Ricardo Ribalda Delgado Signed-off-by: Mian Yousaf Kaukab Signed-off-by: Felipe Balbi --- include/linux/usb/usb338x.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/usb338x.h b/include/linux/usb/usb338x.h index f92eb635b9d3..11525d8d89a7 100644 --- a/include/linux/usb/usb338x.h +++ b/include/linux/usb/usb338x.h @@ -43,6 +43,10 @@ #define IN_ENDPOINT_TYPE 12 #define OUT_ENDPOINT_ENABLE 10 #define OUT_ENDPOINT_TYPE 8 +#define USB3380_EP_CFG_MASK_IN ((0x3 << IN_ENDPOINT_TYPE) | \ + BIT(IN_ENDPOINT_ENABLE)) +#define USB3380_EP_CFG_MASK_OUT ((0x3 << OUT_ENDPOINT_TYPE) | \ + BIT(OUT_ENDPOINT_ENABLE)) struct usb338x_usb_ext_regs { u32 usbclass; -- cgit v1.2.3 From e842b84c8e7221c45c8dbd7de09185c6149e1cf9 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 23 Mar 2015 09:52:48 +1100 Subject: usb: phy: Add interface to get phy give of device_node. Split the "get phy from device_node" functionality out of "get phy by phandle" so it can be used directly. This is useful when a battery-charger is intimately associated with a particular phy but handled by a separate driver. The charger can find the device_node based on sibling relationships without the need for a redundant declaration in the devicetree description. As a peripheral that gets a phy will often want to register a notifier block, and de-register it later, that functionality is included so the de-registration is automatic. Acked-by: Pavel Machek Signed-off-by: NeilBrown Signed-off-by: Felipe Balbi --- include/linux/usb/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/phy.h b/include/linux/usb/phy.h index bc91b5d380fd..8ed1e29ef329 100644 --- a/include/linux/usb/phy.h +++ b/include/linux/usb/phy.h @@ -205,6 +205,8 @@ extern struct usb_phy *usb_get_phy_dev(struct device *dev, u8 index); extern struct usb_phy *devm_usb_get_phy_dev(struct device *dev, u8 index); extern struct usb_phy *devm_usb_get_phy_by_phandle(struct device *dev, const char *phandle, u8 index); +extern struct usb_phy *devm_usb_get_phy_by_node(struct device *dev, + struct device_node *node, struct notifier_block *nb); extern void usb_put_phy(struct usb_phy *); extern void devm_usb_put_phy(struct device *dev, struct usb_phy *x); extern int usb_bind_phy(const char *dev_name, u8 index, -- cgit v1.2.3 From 7d7efec368d537226142cbe559f45797f18672f9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 13 May 2015 16:35:16 -0400 Subject: sched, cgroup: reorganize threadgroup locking threadgroup_change_begin/end() are used to mark the beginning and end of threadgroup modifying operations to allow code paths which require a threadgroup to stay stable across blocking operations to synchronize against those sections using threadgroup_lock/unlock(). It's currently implemented as a general mechanism in sched.h using per-signal_struct rwsem; however, this never grew non-cgroup use cases and becomes noop if !CONFIG_CGROUPS. It turns out that cgroups is gonna be better served with a different sycnrhonization scheme and is a bit silly to keep cgroups specific details as a general mechanism. What's general here is identifying the places where threadgroups are modified. This patch restructures threadgroup locking so that threadgroup_change_begin/end() become a place where subsystems which need to sycnhronize against threadgroup changes can hook into. cgroup_threadgroup_change_begin/end() which operate on the per-signal_struct rwsem are created and threadgroup_lock/unlock() are moved to cgroup.c and made static. This is pure reorganization which doesn't cause any functional changes. Signed-off-by: Tejun Heo Cc: Ingo Molnar Cc: Peter Zijlstra --- include/linux/cgroup-defs.h | 10 +++++++++ include/linux/sched.h | 53 +++++++++++++++------------------------------ 2 files changed, 27 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 55f3120fb952..1b8c93806dbd 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #ifdef CONFIG_CGROUPS @@ -460,5 +461,14 @@ struct cgroup_subsys { unsigned int depends_on; }; +void cgroup_threadgroup_change_begin(struct task_struct *tsk); +void cgroup_threadgroup_change_end(struct task_struct *tsk); + +#else /* CONFIG_CGROUPS */ + +static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) {} +static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {} + #endif /* CONFIG_CGROUPS */ + #endif /* _LINUX_CGROUP_DEFS_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 8222ae40ecb0..5ee290003470 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -58,6 +58,7 @@ struct sched_param { #include #include #include +#include #include @@ -2648,53 +2649,33 @@ static inline void unlock_task_sighand(struct task_struct *tsk, spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); } -#ifdef CONFIG_CGROUPS -static inline void threadgroup_change_begin(struct task_struct *tsk) -{ - down_read(&tsk->signal->group_rwsem); -} -static inline void threadgroup_change_end(struct task_struct *tsk) -{ - up_read(&tsk->signal->group_rwsem); -} - /** - * threadgroup_lock - lock threadgroup - * @tsk: member task of the threadgroup to lock - * - * Lock the threadgroup @tsk belongs to. No new task is allowed to enter - * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or - * change ->group_leader/pid. This is useful for cases where the threadgroup - * needs to stay stable across blockable operations. + * threadgroup_change_begin - mark the beginning of changes to a threadgroup + * @tsk: task causing the changes * - * fork and exit paths explicitly call threadgroup_change_{begin|end}() for - * synchronization. While held, no new task will be added to threadgroup - * and no existing live task will have its PF_EXITING set. - * - * de_thread() does threadgroup_change_{begin|end}() when a non-leader - * sub-thread becomes a new leader. + * All operations which modify a threadgroup - a new thread joining the + * group, death of a member thread (the assertion of PF_EXITING) and + * exec(2) dethreading the process and replacing the leader - are wrapped + * by threadgroup_change_{begin|end}(). This is to provide a place which + * subsystems needing threadgroup stability can hook into for + * synchronization. */ -static inline void threadgroup_lock(struct task_struct *tsk) +static inline void threadgroup_change_begin(struct task_struct *tsk) { - down_write(&tsk->signal->group_rwsem); + might_sleep(); + cgroup_threadgroup_change_begin(tsk); } /** - * threadgroup_unlock - unlock threadgroup - * @tsk: member task of the threadgroup to unlock + * threadgroup_change_end - mark the end of changes to a threadgroup + * @tsk: task causing the changes * - * Reverse threadgroup_lock(). + * See threadgroup_change_begin(). */ -static inline void threadgroup_unlock(struct task_struct *tsk) +static inline void threadgroup_change_end(struct task_struct *tsk) { - up_write(&tsk->signal->group_rwsem); + cgroup_threadgroup_change_end(tsk); } -#else -static inline void threadgroup_change_begin(struct task_struct *tsk) {} -static inline void threadgroup_change_end(struct task_struct *tsk) {} -static inline void threadgroup_lock(struct task_struct *tsk) {} -static inline void threadgroup_unlock(struct task_struct *tsk) {} -#endif #ifndef __HAVE_THREAD_FUNCTIONS -- cgit v1.2.3 From d59cfc09c32a2ae31f1c3bc2983a0cd79afb3f14 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 13 May 2015 16:35:17 -0400 Subject: sched, cgroup: replace signal_struct->group_rwsem with a global percpu_rwsem The cgroup side of threadgroup locking uses signal_struct->group_rwsem to synchronize against threadgroup changes. This per-process rwsem adds small overhead to thread creation, exit and exec paths, forces cgroup code paths to do lock-verify-unlock-retry dance in a couple places and makes it impossible to atomically perform operations across multiple processes. This patch replaces signal_struct->group_rwsem with a global percpu_rwsem cgroup_threadgroup_rwsem which is cheaper on the reader side and contained in cgroups proper. This patch converts one-to-one. This does make writer side heavier and lower the granularity; however, cgroup process migration is a fairly cold path, we do want to optimize thread operations over it and cgroup migration operations don't take enough time for the lower granularity to matter. Signed-off-by: Tejun Heo Cc: Ingo Molnar Cc: Peter Zijlstra --- include/linux/cgroup-defs.h | 27 +++++++++++++++++++++++++-- include/linux/init_task.h | 8 -------- include/linux/sched.h | 12 ------------ 3 files changed, 25 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 1b8c93806dbd..7d83d7f73420 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -461,8 +461,31 @@ struct cgroup_subsys { unsigned int depends_on; }; -void cgroup_threadgroup_change_begin(struct task_struct *tsk); -void cgroup_threadgroup_change_end(struct task_struct *tsk); +extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem; + +/** + * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups + * @tsk: target task + * + * Called from threadgroup_change_begin() and allows cgroup operations to + * synchronize against threadgroup changes using a percpu_rw_semaphore. + */ +static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) +{ + percpu_down_read(&cgroup_threadgroup_rwsem); +} + +/** + * cgroup_threadgroup_change_end - threadgroup exclusion for cgroups + * @tsk: target task + * + * Called from threadgroup_change_end(). Counterpart of + * cgroup_threadcgroup_change_begin(). + */ +static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) +{ + percpu_up_read(&cgroup_threadgroup_rwsem); +} #else /* CONFIG_CGROUPS */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 696d22312b31..0cc0bbf20022 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -25,13 +25,6 @@ extern struct files_struct init_files; extern struct fs_struct init_fs; -#ifdef CONFIG_CGROUPS -#define INIT_GROUP_RWSEM(sig) \ - .group_rwsem = __RWSEM_INITIALIZER(sig.group_rwsem), -#else -#define INIT_GROUP_RWSEM(sig) -#endif - #ifdef CONFIG_CPUSETS #define INIT_CPUSET_SEQ(tsk) \ .mems_allowed_seq = SEQCNT_ZERO(tsk.mems_allowed_seq), @@ -56,7 +49,6 @@ extern struct fs_struct init_fs; }, \ .cred_guard_mutex = \ __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ - INIT_GROUP_RWSEM(sig) \ } extern struct nsproxy init_nsproxy; diff --git a/include/linux/sched.h b/include/linux/sched.h index 5ee290003470..add524a910bd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -743,18 +743,6 @@ struct signal_struct { unsigned audit_tty_log_passwd; struct tty_audit_buf *tty_audit_buf; #endif -#ifdef CONFIG_CGROUPS - /* - * group_rwsem prevents new tasks from entering the threadgroup and - * member tasks from exiting,a more specifically, setting of - * PF_EXITING. fork and exit paths are protected with this rwsem - * using threadgroup_change_begin/end(). Users which require - * threadgroup to remain stable should use threadgroup_[un]lock() - * which also takes care of exec path. Currently, cgroup is the - * only user. - */ - struct rw_semaphore group_rwsem; -#endif oom_flags_t oom_flags; short oom_score_adj; /* OOM kill score adjustment */ -- cgit v1.2.3 From 194ec9368c0dbc421acdb2620d4dfb3cc3d022ff Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 14 May 2015 15:28:24 +0100 Subject: drivers: of/base: move of_init to driver_init Commit 5590f3196b29 ("drivers/core/of: Add symlink to device-tree from devices with an OF node") adds the symlink `of_node` for each device pointing to it's device tree node while creating/initialising it. However the devicetree sysfs is created and setup in of_init which is executed at core_initcall level. For all the devices created before of_init, the following error is thrown: "Error -2(-ENOENT) creating of_node link" Like many other components in driver model, initialize the sysfs support for OF/devicetree from driver_init so that it's ready before any devices are created. Fixes: 5590f3196b29 ("drivers/core/of: Add symlink to device-tree from devices with an OF node") Suggested-by: Rob Herring Cc: Grant Likely Cc: Pawel Moll Cc: Benjamin Herrenschmidt Signed-off-by: Sudeep Holla Tested-by: Robert Schwebel Acked-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- include/linux/of.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index ddeaae6d2083..b871ff9d81d7 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -121,6 +121,8 @@ extern struct device_node *of_stdout; extern raw_spinlock_t devtree_lock; #ifdef CONFIG_OF +void of_core_init(void); + static inline bool is_of_node(struct fwnode_handle *fwnode) { return fwnode && fwnode->type == FWNODE_OF; @@ -376,6 +378,10 @@ bool of_console_check(struct device_node *dn, char *name, int index); #else /* CONFIG_OF */ +static inline void of_core_init(void) +{ +} + static inline bool is_of_node(struct fwnode_handle *fwnode) { return false; -- cgit v1.2.3 From e463d88c36d42211aa72ed76d32fb8bf37820ef1 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 26 May 2015 12:19:58 -0700 Subject: net: phy: Add phy_interface_is_rgmii helper RGMII interfaces come in 4 different flavors that the PHY library needs to care about: regular RGMII (no delays), RGMII with either RX or TX delay, and both. In order to avoid errors of checking only for one type of RGMII interface and miss the 3 others, introduce a convenience function which tests for all values. Suggested-by: David S. Miller Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 701c7a3946e0..a26c3f84b8dd 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -677,6 +677,17 @@ static inline bool phy_is_internal(struct phy_device *phydev) return phydev->is_internal; } +/** + * phy_interface_is_rgmii - Convenience function for testing if a PHY interface + * is RGMII (all variants) + * @phydev: the phy_device struct + */ +static inline bool phy_interface_is_rgmii(struct phy_device *phydev) +{ + return phydev->interface >= PHY_INTERFACE_MODE_RGMII && + phydev->interface <= PHY_INTERFACE_MODE_RGMII_TXID; +} + /** * phy_write_mmd - Convenience function for writing a register * on an MMD on a given PHY. -- cgit v1.2.3 From b371b594317869971af326adcf7cd65cabdb4087 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 21 May 2015 10:57:13 +0200 Subject: perf/x86: Fix event/group validation Commit 43b4578071c0 ("perf/x86: Reduce stack usage of x86_schedule_events()") violated the rule that 'fake' scheduling; as used for event/group validation; should not change the event state. This went mostly un-noticed because repeated calls of x86_pmu::get_event_constraints() would give the same result. And x86_pmu::put_event_constraints() would mostly not do anything. Commit e979121b1b15 ("perf/x86/intel: Implement cross-HT corruption bug workaround") made the situation much worse by actually setting the event->hw.constraint value to NULL, so when validation and actual scheduling interact we get NULL ptr derefs. Fix it by removing the constraint pointer from the event and move it back to an array, this time in cpuc instead of on the stack. validate_group() x86_schedule_events() event->hw.constraint = c; # store perf_task_event_sched_in() ... x86_schedule_events(); event->hw.constraint = c2; # store ... put_event_constraints(event); # assume failure to schedule intel_put_event_constraints() event->hw.constraint = NULL; c = event->hw.constraint; # read -> NULL if (!test_bit(hwc->idx, c->idxmsk)) # <- *BOOM* NULL deref This in particular is possible when the event in question is a cpu-wide event and group-leader, where the validate_group() tries to add an event to the group. Reported-by: Vince Weaver Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Hunter Cc: Linus Torvalds Cc: Maria Dimakopoulou Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 43b4578071c0 ("perf/x86: Reduce stack usage of x86_schedule_events()") Fixes: e979121b1b15 ("perf/x86/intel: Implement cross-HT corruption bug workaround") Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 61992cf2e977..d8a82a89f35a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -92,8 +92,6 @@ struct hw_perf_event_extra { int idx; /* index in shared_regs->regs[] */ }; -struct event_constraint; - /** * struct hw_perf_event - performance event hardware details: */ @@ -112,8 +110,6 @@ struct hw_perf_event { struct hw_perf_event_extra extra_reg; struct hw_perf_event_extra branch_reg; - - struct event_constraint *constraint; }; struct { /* software */ struct hrtimer hrtimer; -- cgit v1.2.3 From b3df4ec4424f27e55d754cfe586195fecca1c4e4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 19 May 2015 00:00:51 +0000 Subject: perf/x86/intel/cqm: Use proper data types 'int' is really not a proper data type for an MSR. Use u32 to make it clear that we are dealing with a 32-bit unsigned hardware value. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Acked-by: Matt Fleming Cc: Kanaka Juvva Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Vikas Shivappa Cc: Will Auld Link: http://lkml.kernel.org/r/20150518235149.919350144@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 248f7829ce41..06580028cee6 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -120,7 +120,7 @@ struct hw_perf_event { }; struct { /* intel_cqm */ int cqm_state; - int cqm_rmid; + u32 cqm_rmid; struct list_head cqm_events_entry; struct list_head cqm_groups_entry; struct list_head cqm_group_entry; -- cgit v1.2.3 From 16b369a91d0dd80be214b7f7801fbc51875454cc Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Mon, 25 May 2015 15:08:47 +0200 Subject: random: Blocking API for accessing nonblocking_pool The added API calls provide a synchronous function call get_blocking_random_bytes where the caller is blocked until the nonblocking_pool is initialized. CC: Andreas Steffen CC: Theodore Ts'o CC: Sandy Harris Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu --- include/linux/random.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/random.h b/include/linux/random.h index b05856e16b75..796267d56901 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -14,6 +14,7 @@ extern void add_input_randomness(unsigned int type, unsigned int code, extern void add_interrupt_randomness(int irq, int irq_flags); extern void get_random_bytes(void *buf, int nbytes); +extern void get_blocking_random_bytes(void *buf, int nbytes); extern void get_random_bytes_arch(void *buf, int nbytes); void generate_random_uuid(unsigned char uuid_out[16]); extern int random_int_secret_init(void); -- cgit v1.2.3 From 7d010fdf299929f9583ce5e17da629dcd83c36ef Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 26 May 2015 10:28:13 +0200 Subject: x86/mm/mtrr: Avoid #ifdeffery with phys_wc_to_mtrr_index() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is only one user but since we're going to bury MTRR next out of access to drivers, expose this last piece of API to drivers in a general fashion only needing io.h for access to helpers. Signed-off-by: Luis R. Rodriguez Signed-off-by: Borislav Petkov Cc: Abhilash Kesavan Cc: Andrew Morton Cc: Andy Lutomirski Cc: Antonino Daplas Cc: Borislav Petkov Cc: Brian Gerst Cc: Catalin Marinas Cc: Cristian Stoica Cc: Daniel Vetter Cc: Dave Airlie Cc: Dave Hansen Cc: Davidlohr Bueso Cc: Denys Vlasenko Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: Jean-Christophe Plagniol-Villard Cc: Juergen Gross Cc: Linus Torvalds Cc: Matthias Brugger Cc: Mel Gorman Cc: Peter Zijlstra Cc: Suresh Siddha Cc: Thierry Reding Cc: Thomas Gleixner Cc: Tomi Valkeinen Cc: Toshi Kani Cc: Ville Syrjälä Cc: Vlastimil Babka Cc: Will Deacon Cc: dri-devel@lists.freedesktop.org Link: http://lkml.kernel.org/r/1429722736-4473-1-git-send-email-mcgrof@do-not-panic.com Link: http://lkml.kernel.org/r/1432628901-18044-11-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- include/linux/io.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/io.h b/include/linux/io.h index 986f2bffea1e..04cce4da3685 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -111,6 +111,13 @@ static inline void arch_phys_wc_del(int handle) } #define arch_phys_wc_add arch_phys_wc_add +#ifndef arch_phys_wc_index +static inline int arch_phys_wc_index(int handle) +{ + return -1; +} +#define arch_phys_wc_index arch_phys_wc_index +#endif #endif #endif /* _LINUX_IO_H */ -- cgit v1.2.3 From 06931e62246844c73fba24d7aeb4a5dc897a2739 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 26 May 2015 15:11:28 +0200 Subject: sched/topology: Rename topology_thread_cpumask() to topology_sibling_cpumask() Rename topology_thread_cpumask() to topology_sibling_cpumask() for more consistency with scheduler code. Signed-off-by: Bartosz Golaszewski Reviewed-by: Thomas Gleixner Acked-by: Russell King Acked-by: Catalin Marinas Cc: Benoit Cousson Cc: Fenghua Yu Cc: Guenter Roeck Cc: Jean Delvare Cc: Jonathan Corbet Cc: Linus Torvalds Cc: Oleg Drokin Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Russell King Cc: Viresh Kumar Link: http://lkml.kernel.org/r/1432645896-12588-2-git-send-email-bgolaszewski@baylibre.com Signed-off-by: Ingo Molnar --- include/linux/topology.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index 909b6e43b694..73ddad1e0fa3 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -191,8 +191,8 @@ static inline int cpu_to_mem(int cpu) #ifndef topology_core_id #define topology_core_id(cpu) ((void)(cpu), 0) #endif -#ifndef topology_thread_cpumask -#define topology_thread_cpumask(cpu) cpumask_of(cpu) +#ifndef topology_sibling_cpumask +#define topology_sibling_cpumask(cpu) cpumask_of(cpu) #endif #ifndef topology_core_cpumask #define topology_core_cpumask(cpu) cpumask_of(cpu) @@ -201,7 +201,7 @@ static inline int cpu_to_mem(int cpu) #ifdef CONFIG_SCHED_SMT static inline const struct cpumask *cpu_smt_mask(int cpu) { - return topology_thread_cpumask(cpu); + return topology_sibling_cpumask(cpu); } #endif -- cgit v1.2.3 From 66eb579e66ecfea55e2007be0594869ea9e453d4 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 13 May 2015 17:12:23 +0100 Subject: perf: allow for PMU-specific event filtering In certain circumstances it may not be possible to schedule particular events due to constraints other than a lack of hardware counters (e.g. on big.LITTLE systems where CPUs support different events). The core perf event code does not distinguish these cases and pessimistically assumes that any failure to schedule an event means that it is not worth attempting to schedule later events, even if some hardware counters are still unused. When an event a pmu cannot schedule exists in a flexible group list it can unnecessarily prevent event groups following it in the list from being scheduled (until it is rotated to the end of the list). This means some events are scheduled for only a portion of the time they could be, and for short running programs no events may be scheduled if the list is initially sorted in an unfortunate order. This patch adds a new (optional) filter_match function pointer to struct pmu which a pmu driver can use to tell perf core when an event matches pmu-specific scheduling requirements. This plugs into the existing event_filter_match logic, and makes it possible to avoid the scheduling problem described above. When no filter is provided by the PMU, the existing behaviour is retained. Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Acked-by: Will Deacon Acked-by: Peter Zijlstra Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- include/linux/perf_event.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 61992cf2e977..67c719cc91aa 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -304,6 +304,11 @@ struct pmu { * Free pmu-private AUX data structures */ void (*free_aux) (void *aux); /* optional */ + + /* + * Filter events for PMU-specific reasons. + */ + int (*filter_match) (struct perf_event *event); /* optional */ }; /** -- cgit v1.2.3 From 24fe86a617c550fb9bdc6c8bd7cf647d3955f8ba Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 29 Mar 2015 12:50:46 +0200 Subject: phy: sun4i-usb: Add a sunxi specific function for setting squelch-detect The sunxi otg phy has a bug where it wrongly detects a high speed squelch when reset on the root port gets de-asserted with a lo-speed device. The workaround for this is to disable squelch detect before de-asserting reset, and re-enabling it after the reset de-assert is done. Add a sunxi specific phy function to allow the sunxi-musb glue to do this. Acked-by: Kishon Vijay Abraham I Signed-off-by: Hans de Goede Signed-off-by: Felipe Balbi --- include/linux/phy/phy-sun4i-usb.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 include/linux/phy/phy-sun4i-usb.h (limited to 'include/linux') diff --git a/include/linux/phy/phy-sun4i-usb.h b/include/linux/phy/phy-sun4i-usb.h new file mode 100644 index 000000000000..50aed92ea89c --- /dev/null +++ b/include/linux/phy/phy-sun4i-usb.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2015 Hans de Goede + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef PHY_SUN4I_USB_H_ +#define PHY_SUN4I_USB_H_ + +#include "phy.h" + +/** + * sun4i_usb_phy_set_squelch_detect() - Enable/disable squelch detect + * @phy: reference to a sun4i usb phy + * @enabled: wether to enable or disable squelch detect + */ +void sun4i_usb_phy_set_squelch_detect(struct phy *phy, bool enabled); + +#endif -- cgit v1.2.3 From e5c4708b2b6fec0300db60ee3cf4b4ae96430a12 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Tue, 26 May 2015 19:20:14 -0700 Subject: pci: Add Cavium PCI vendor id This vendor id will be used by network (vNIC), USB (xHCI), SATA (AHCI), GPIO, I2C, MMC and maybe other drivers for ThunderX SoC. Acked-by: Bjorn Helgaas Signed-off-by: Sunil Goutham Signed-off-by: Aleksey Makarov Signed-off-by: David S. Miller --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 2f7b9a40f627..2972c7f3aa1d 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2329,6 +2329,8 @@ #define PCI_DEVICE_ID_ALTIMA_AC9100 0x03ea #define PCI_DEVICE_ID_ALTIMA_AC1003 0x03eb +#define PCI_VENDOR_ID_CAVIUM 0x177d + #define PCI_VENDOR_ID_BELKIN 0x1799 #define PCI_DEVICE_ID_BELKIN_F5D7010V7 0x701f -- cgit v1.2.3 From 4612c715a6ea6b3af2aee0163c0721375b2548d7 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 18 May 2015 12:58:40 +0200 Subject: mtd: cfi: deinline large functions With this .config: http://busybox.net/~vda/kernel_config, after uninlining these functions have sizes and callsite counts as follows: cfi_udelay(): 74 bytes, 26 callsites cfi_send_gen_cmd(): 153 bytes, 95 callsites cfi_build_cmd(): 274 bytes, 123 callsites cfi_build_cmd_addr(): 49 bytes, 15 callsites cfi_merge_status(): 230 bytes, 3 callsites Reduction in code size is about 50,000: text data bss dec hex filename 85842882 22294584 20627456 128764922 7accbfa vmlinux.before 85789648 22294616 20627456 128711720 7abfc28 vmlinux Signed-off-by: Denys Vlasenko CC: Dan Carpenter CC: Jingoo Han CC: Brian Norris CC: Aaron Sierra CC: Artem Bityutskiy CC: David Woodhouse CC: linux-mtd@lists.infradead.org CC: linux-kernel@vger.kernel.org Signed-off-by: Brian Norris --- include/linux/mtd/cfi.h | 188 +++--------------------------------------------- 1 file changed, 8 insertions(+), 180 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index 299d7d31fe53..9b57a9b1b081 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -296,183 +296,19 @@ struct cfi_private { struct flchip chips[0]; /* per-chip data structure for each chip */ }; -/* - * Returns the command address according to the given geometry. - */ -static inline uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs, - struct map_info *map, struct cfi_private *cfi) -{ - unsigned bankwidth = map_bankwidth(map); - unsigned interleave = cfi_interleave(cfi); - unsigned type = cfi->device_type; - uint32_t addr; - - addr = (cmd_ofs * type) * interleave; - - /* Modify the unlock address if we are in compatibility mode. - * For 16bit devices on 8 bit busses - * and 32bit devices on 16 bit busses - * set the low bit of the alternating bit sequence of the address. - */ - if (((type * interleave) > bankwidth) && ((cmd_ofs & 0xff) == 0xaa)) - addr |= (type >> 1)*interleave; - - return addr; -} - -/* - * Transforms the CFI command for the given geometry (bus width & interleave). - * It looks too long to be inline, but in the common case it should almost all - * get optimised away. - */ -static inline map_word cfi_build_cmd(u_long cmd, struct map_info *map, struct cfi_private *cfi) -{ - map_word val = { {0} }; - int wordwidth, words_per_bus, chip_mode, chips_per_word; - unsigned long onecmd; - int i; - - /* We do it this way to give the compiler a fighting chance - of optimising away all the crap for 'bankwidth' larger than - an unsigned long, in the common case where that support is - disabled */ - if (map_bankwidth_is_large(map)) { - wordwidth = sizeof(unsigned long); - words_per_bus = (map_bankwidth(map)) / wordwidth; // i.e. normally 1 - } else { - wordwidth = map_bankwidth(map); - words_per_bus = 1; - } - - chip_mode = map_bankwidth(map) / cfi_interleave(cfi); - chips_per_word = wordwidth * cfi_interleave(cfi) / map_bankwidth(map); - - /* First, determine what the bit-pattern should be for a single - device, according to chip mode and endianness... */ - switch (chip_mode) { - default: BUG(); - case 1: - onecmd = cmd; - break; - case 2: - onecmd = cpu_to_cfi16(map, cmd); - break; - case 4: - onecmd = cpu_to_cfi32(map, cmd); - break; - } - - /* Now replicate it across the size of an unsigned long, or - just to the bus width as appropriate */ - switch (chips_per_word) { - default: BUG(); -#if BITS_PER_LONG >= 64 - case 8: - onecmd |= (onecmd << (chip_mode * 32)); -#endif - case 4: - onecmd |= (onecmd << (chip_mode * 16)); - case 2: - onecmd |= (onecmd << (chip_mode * 8)); - case 1: - ; - } +uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs, + struct map_info *map, struct cfi_private *cfi); - /* And finally, for the multi-word case, replicate it - in all words in the structure */ - for (i=0; i < words_per_bus; i++) { - val.x[i] = onecmd; - } - - return val; -} +map_word cfi_build_cmd(u_long cmd, struct map_info *map, struct cfi_private *cfi); #define CMD(x) cfi_build_cmd((x), map, cfi) - -static inline unsigned long cfi_merge_status(map_word val, struct map_info *map, - struct cfi_private *cfi) -{ - int wordwidth, words_per_bus, chip_mode, chips_per_word; - unsigned long onestat, res = 0; - int i; - - /* We do it this way to give the compiler a fighting chance - of optimising away all the crap for 'bankwidth' larger than - an unsigned long, in the common case where that support is - disabled */ - if (map_bankwidth_is_large(map)) { - wordwidth = sizeof(unsigned long); - words_per_bus = (map_bankwidth(map)) / wordwidth; // i.e. normally 1 - } else { - wordwidth = map_bankwidth(map); - words_per_bus = 1; - } - - chip_mode = map_bankwidth(map) / cfi_interleave(cfi); - chips_per_word = wordwidth * cfi_interleave(cfi) / map_bankwidth(map); - - onestat = val.x[0]; - /* Or all status words together */ - for (i=1; i < words_per_bus; i++) { - onestat |= val.x[i]; - } - - res = onestat; - switch(chips_per_word) { - default: BUG(); -#if BITS_PER_LONG >= 64 - case 8: - res |= (onestat >> (chip_mode * 32)); -#endif - case 4: - res |= (onestat >> (chip_mode * 16)); - case 2: - res |= (onestat >> (chip_mode * 8)); - case 1: - ; - } - - /* Last, determine what the bit-pattern should be for a single - device, according to chip mode and endianness... */ - switch (chip_mode) { - case 1: - break; - case 2: - res = cfi16_to_cpu(map, res); - break; - case 4: - res = cfi32_to_cpu(map, res); - break; - default: BUG(); - } - return res; -} - +unsigned long cfi_merge_status(map_word val, struct map_info *map, + struct cfi_private *cfi); #define MERGESTATUS(x) cfi_merge_status((x), map, cfi) - -/* - * Sends a CFI command to a bank of flash for the given geometry. - * - * Returns the offset in flash where the command was written. - * If prev_val is non-null, it will be set to the value at the command address, - * before the command was written. - */ -static inline uint32_t cfi_send_gen_cmd(u_char cmd, uint32_t cmd_addr, uint32_t base, +uint32_t cfi_send_gen_cmd(u_char cmd, uint32_t cmd_addr, uint32_t base, struct map_info *map, struct cfi_private *cfi, - int type, map_word *prev_val) -{ - map_word val; - uint32_t addr = base + cfi_build_cmd_addr(cmd_addr, map, cfi); - val = cfi_build_cmd(cmd, map, cfi); - - if (prev_val) - *prev_val = map_read(map, addr); - - map_write(map, val, addr); - - return addr - base; -} + int type, map_word *prev_val); static inline uint8_t cfi_read_query(struct map_info *map, uint32_t addr) { @@ -506,15 +342,7 @@ static inline uint16_t cfi_read_query16(struct map_info *map, uint32_t addr) } } -static inline void cfi_udelay(int us) -{ - if (us >= 1000) { - msleep((us+999)/1000); - } else { - udelay(us); - cond_resched(); - } -} +void cfi_udelay(int us); int __xipram cfi_qry_present(struct map_info *map, __u32 base, struct cfi_private *cfi); -- cgit v1.2.3 From 7d0ae8086b828311250c6afdf800b568ac9bd693 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 3 Mar 2015 14:57:58 -0800 Subject: rcu: Convert ACCESS_ONCE() to READ_ONCE() and WRITE_ONCE() This commit moves from the old ACCESS_ONCE() API to the new READ_ONCE() and WRITE_ONCE() APIs. Signed-off-by: Paul E. McKenney [ paulmck: Updated to include kernel/torture.c as suggested by Jason Low. ] --- include/linux/rculist.h | 6 +++--- include/linux/rcupdate.h | 16 ++++++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index a18b16f1dc0e..665397247e82 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -29,8 +29,8 @@ */ static inline void INIT_LIST_HEAD_RCU(struct list_head *list) { - ACCESS_ONCE(list->next) = list; - ACCESS_ONCE(list->prev) = list; + WRITE_ONCE(list->next, list); + WRITE_ONCE(list->prev, list); } /* @@ -288,7 +288,7 @@ static inline void list_splice_init_rcu(struct list_head *list, #define list_first_or_null_rcu(ptr, type, member) \ ({ \ struct list_head *__ptr = (ptr); \ - struct list_head *__next = ACCESS_ONCE(__ptr->next); \ + struct list_head *__next = READ_ONCE(__ptr->next); \ likely(__ptr != __next) ? list_entry_rcu(__next, type, member) : NULL; \ }) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 573a5afd5ed8..87bb0eee665b 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -364,8 +364,8 @@ extern struct srcu_struct tasks_rcu_exit_srcu; #define rcu_note_voluntary_context_switch(t) \ do { \ rcu_all_qs(); \ - if (ACCESS_ONCE((t)->rcu_tasks_holdout)) \ - ACCESS_ONCE((t)->rcu_tasks_holdout) = false; \ + if (READ_ONCE((t)->rcu_tasks_holdout)) \ + WRITE_ONCE((t)->rcu_tasks_holdout, false); \ } while (0) #else /* #ifdef CONFIG_TASKS_RCU */ #define TASKS_RCU(x) do { } while (0) @@ -609,7 +609,7 @@ static inline void rcu_preempt_sleep_check(void) #define __rcu_access_pointer(p, space) \ ({ \ - typeof(*p) *_________p1 = (typeof(*p) *__force)ACCESS_ONCE(p); \ + typeof(*p) *_________p1 = (typeof(*p) *__force)READ_ONCE(p); \ rcu_dereference_sparse(p, space); \ ((typeof(*p) __force __kernel *)(_________p1)); \ }) @@ -630,7 +630,7 @@ static inline void rcu_preempt_sleep_check(void) #define __rcu_access_index(p, space) \ ({ \ - typeof(p) _________p1 = ACCESS_ONCE(p); \ + typeof(p) _________p1 = READ_ONCE(p); \ rcu_dereference_sparse(p, space); \ (_________p1); \ }) @@ -659,7 +659,7 @@ static inline void rcu_preempt_sleep_check(void) */ #define lockless_dereference(p) \ ({ \ - typeof(p) _________p1 = ACCESS_ONCE(p); \ + typeof(p) _________p1 = READ_ONCE(p); \ smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ (_________p1); \ }) @@ -702,7 +702,7 @@ static inline void rcu_preempt_sleep_check(void) * @p: The pointer to read * * Return the value of the specified RCU-protected pointer, but omit the - * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful + * smp_read_barrier_depends() and keep the READ_ONCE(). This is useful * when the value of this pointer is accessed, but the pointer is not * dereferenced, for example, when testing an RCU-protected pointer against * NULL. Although rcu_access_pointer() may also be used in cases where @@ -791,7 +791,7 @@ static inline void rcu_preempt_sleep_check(void) * @p: The index to read * * Return the value of the specified RCU-protected index, but omit the - * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful + * smp_read_barrier_depends() and keep the READ_ONCE(). This is useful * when the value of this index is accessed, but the index is not * dereferenced, for example, when testing an RCU-protected index against * -1. Although rcu_access_index() may also be used in cases where @@ -827,7 +827,7 @@ static inline void rcu_preempt_sleep_check(void) * @c: The conditions under which the dereference will take place * * Return the value of the specified RCU-protected pointer, but omit - * both the smp_read_barrier_depends() and the ACCESS_ONCE(). This + * both the smp_read_barrier_depends() and the READ_ONCE(). This * is useful in cases where update-side locks prevent the value of the * pointer from changing. Please note that this primitive does -not- * prevent the compiler from repeating this reference or combining it -- cgit v1.2.3 From 1ebee8017d84ec8a0ba893cf7b8be3f70ead088b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 19 Apr 2015 18:21:47 -0700 Subject: rcu: Eliminate array-index-based RCU primitives Now that rcu_access_index() and rcu_dereference_index_check() are no longer used, the commit removes them from the RCU API. This means that RCU's data dependencies now involve only pointers, give or take the occasional cast to and then back from an integer type to do pointer arithmetic. This in turn eliminates the need for a number of operations on values carrying RCU data dependencies. Signed-off-by: Paul E. McKenney Cc: linux-edac@vger.kernel.org Cc: Tony Luck Acked-by: Borislav Petkov --- include/linux/rcupdate.h | 50 ------------------------------------------------ 1 file changed, 50 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 87bb0eee665b..b97842ff71d2 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -628,21 +628,6 @@ static inline void rcu_preempt_sleep_check(void) ((typeof(*p) __force __kernel *)(p)); \ }) -#define __rcu_access_index(p, space) \ -({ \ - typeof(p) _________p1 = READ_ONCE(p); \ - rcu_dereference_sparse(p, space); \ - (_________p1); \ -}) -#define __rcu_dereference_index_check(p, c) \ -({ \ - /* Dependency order vs. p above. */ \ - typeof(p) _________p1 = lockless_dereference(p); \ - rcu_lockdep_assert(c, \ - "suspicious rcu_dereference_index_check() usage"); \ - (_________p1); \ -}) - /** * RCU_INITIALIZER() - statically initialize an RCU-protected global variable * @v: The value to statically initialize with. @@ -786,41 +771,6 @@ static inline void rcu_preempt_sleep_check(void) */ #define rcu_dereference_raw_notrace(p) __rcu_dereference_check((p), 1, __rcu) -/** - * rcu_access_index() - fetch RCU index with no dereferencing - * @p: The index to read - * - * Return the value of the specified RCU-protected index, but omit the - * smp_read_barrier_depends() and keep the READ_ONCE(). This is useful - * when the value of this index is accessed, but the index is not - * dereferenced, for example, when testing an RCU-protected index against - * -1. Although rcu_access_index() may also be used in cases where - * update-side locks prevent the value of the index from changing, you - * should instead use rcu_dereference_index_protected() for this use case. - */ -#define rcu_access_index(p) __rcu_access_index((p), __rcu) - -/** - * rcu_dereference_index_check() - rcu_dereference for indices with debug checking - * @p: The pointer to read, prior to dereferencing - * @c: The conditions under which the dereference will take place - * - * Similar to rcu_dereference_check(), but omits the sparse checking. - * This allows rcu_dereference_index_check() to be used on integers, - * which can then be used as array indices. Attempting to use - * rcu_dereference_check() on an integer will give compiler warnings - * because the sparse address-space mechanism relies on dereferencing - * the RCU-protected pointer. Dereferencing integers is not something - * that even gcc will put up with. - * - * Note that this function does not implicitly check for RCU read-side - * critical sections. If this function gains lots of uses, it might - * make sense to provide versions for each flavor of RCU, but it does - * not make sense as of early 2010. - */ -#define rcu_dereference_index_check(p, c) \ - __rcu_dereference_index_check((p), (c)) - /** * rcu_dereference_protected() - fetch RCU pointer when updates prevented * @p: The pointer to read, prior to dereferencing -- cgit v1.2.3 From d956028e99b30726b0bce0ca684b40b1ad67b514 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 31 Mar 2015 09:39:41 +0100 Subject: documentation: memory-barriers: Fix smp_mb__before_spinlock() semantics Our current documentation claims that, when followed by an ACQUIRE, smp_mb__before_spinlock() orders prior loads against subsequent loads and stores, which isn't the intent. This commit therefore fixes the documentation to state that this sequence orders only prior stores against subsequent loads and stores. In addition, the original intent of smp_mb__before_spinlock() was to only order prior loads against subsequent stores, however, people have started using it as if it ordered prior loads against subsequent loads and stores. This commit therefore also updates smp_mb__before_spinlock()'s header comment to reflect this new reality. Cc: Oleg Nesterov Cc: "Paul E. McKenney" Cc: Peter Zijlstra Signed-off-by: Will Deacon Signed-off-by: Paul E. McKenney --- include/linux/spinlock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 3e18379dfa6f..0063b24b4f36 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -120,7 +120,7 @@ do { \ /* * Despite its name it doesn't necessarily has to be a full barrier. * It should only guarantee that a STORE before the critical section - * can not be reordered with a LOAD inside this section. + * can not be reordered with LOADs and STOREs inside this section. * spin_lock() is the one-way barrier, this LOAD can not escape out * of the region. So the default implementation simply ensures that * a STORE can not move into the critical section, smp_wmb() should -- cgit v1.2.3 From 3382adbc1bb8c80ea512243acf6059564287620b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 4 Mar 2015 15:41:24 -0800 Subject: rcu: Eliminate a few CONFIG_RCU_NOCB_CPU_ALL #ifdefs This commit converts several CONFIG_RCU_NOCB_CPU_ALL #ifdefs to instead use IS_ENABLED(). This change should help avoid hiding code from compiler diagnostics. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 4 ++-- include/linux/rcutree.h | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 87bb0eee665b..5ec20bc4af76 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -1153,13 +1153,13 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define kfree_rcu(ptr, rcu_head) \ __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) -#if defined(CONFIG_TINY_RCU) || defined(CONFIG_RCU_NOCB_CPU_ALL) +#ifdef CONFIG_TINY_RCU static inline int rcu_needs_cpu(unsigned long *delta_jiffies) { *delta_jiffies = ULONG_MAX; return 0; } -#endif /* #if defined(CONFIG_TINY_RCU) || defined(CONFIG_RCU_NOCB_CPU_ALL) */ +#endif /* #ifdef CONFIG_TINY_RCU */ #if defined(CONFIG_RCU_NOCB_CPU_ALL) static inline bool rcu_is_nocb_cpu(int cpu) { return true; } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index d2e583a6aaca..0bd400b02430 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -31,9 +31,7 @@ #define __LINUX_RCUTREE_H void rcu_note_context_switch(void); -#ifndef CONFIG_RCU_NOCB_CPU_ALL int rcu_needs_cpu(unsigned long *delta_jiffies); -#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ void rcu_cpu_stall_reset(void); /* -- cgit v1.2.3 From 5af4692a75daf08dddc93dbb4cd2a1b3d3b617af Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 25 Apr 2015 12:48:29 -0700 Subject: smp: Make control dependencies work on Alpha, improve documentation The current formulation of control dependencies fails on DEC Alpha, which does not respect dependencies of any kind unless an explicit memory barrier is provided. This means that the current fomulation of control dependencies fails on Alpha. This commit therefore creates a READ_ONCE_CTRL() that has the same overhead on non-Alpha systems, but causes Alpha to produce the needed ordering. This commit also applies READ_ONCE_CTRL() to the one known use of control dependencies. Use of READ_ONCE_CTRL() also has the beneficial effect of adding a bit of self-documentation to control dependencies. Signed-off-by: Paul E. McKenney Acked-by: Peter Zijlstra (Intel) --- include/linux/compiler.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 867722591be2..5d66777914db 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -252,6 +252,22 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s #define WRITE_ONCE(x, val) \ ({ typeof(x) __val = (val); __write_once_size(&(x), &__val, sizeof(__val)); __val; }) +/** + * READ_ONCE_CTRL - Read a value heading a control dependency + * @x: The value to be read, heading the control dependency + * + * Control dependencies are tricky. See Documentation/memory-barriers.txt + * for important information on how to use them. Note that in many cases, + * use of smp_load_acquire() will be much simpler. Control dependencies + * should be avoided except on the hottest of hotpaths. + */ +#define READ_ONCE_CTRL(x) \ +({ \ + typeof(x) __val = READ_ONCE(x); \ + smp_read_barrier_depends(); /* Enforce control dependency. */ \ + __val; \ +}) + #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ -- cgit v1.2.3 From f517700cce37ffcb36e7afae0294fd11c72ed134 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 26 Mar 2015 13:27:08 +0800 Subject: rculist: Fix another sparse warning This fixes the following sparse warnings: make C=1 CF=-D__CHECK_ENDIAN__ net/tipc/name_table.o net/tipc/name_table.c:977:17: error: incompatible types in comparison expression (different address spaces) net/tipc/name_table.c:977:17: error: incompatible types in comparison expression (different address spaces) To silence these spare complaints, an RCU annotation should be added to "next" pointer of hlist_node structure through hlist_next_rcu() macro when iterating over a hlist with hlist_for_each_entry_from_rcu(). Signed-off-by: Ying Xue Signed-off-by: Paul E. McKenney --- include/linux/rculist.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 665397247e82..17c6b1f84a77 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -549,8 +549,8 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n, */ #define hlist_for_each_entry_from_rcu(pos, member) \ for (; pos; \ - pos = hlist_entry_safe(rcu_dereference((pos)->member.next),\ - typeof(*(pos)), member)) + pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu( \ + &(pos)->member)), typeof(*(pos)), member)) #endif /* __KERNEL__ */ #endif -- cgit v1.2.3 From 51952bc633064311410b041fad38da1614f4539e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 21 Apr 2015 11:15:30 -0700 Subject: rcu: Further shrink Tiny RCU by making empty functions static inlines The Tiny RCU counterparts to rcu_idle_enter(), rcu_idle_exit(), rcu_irq_enter(), and rcu_irq_exit() are empty functions, but each has EXPORT_SYMBOL_GPL(), which needlessly consumes extra memory, especially in kernels built with module support. This commit therefore moves these functions to static inlines in rcutiny.h, removing the need for exports. This won't affect the size of the tiniest kernels, which are likely built without module support, but might help semi-tiny kernels that might include module support. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 4 ---- include/linux/rcutiny.h | 16 ++++++++++++++++ include/linux/rcutree.h | 5 +++++ 3 files changed, 21 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 87bb0eee665b..1b3d7bcb3a6c 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -292,10 +292,6 @@ void rcu_sched_qs(void); void rcu_bh_qs(void); void rcu_check_callbacks(int user); struct notifier_block; -void rcu_idle_enter(void); -void rcu_idle_exit(void); -void rcu_irq_enter(void); -void rcu_irq_exit(void); int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu); diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 937edaeb150d..3df6c1ec4e25 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -159,6 +159,22 @@ static inline void rcu_cpu_stall_reset(void) { } +static inline void rcu_idle_enter(void) +{ +} + +static inline void rcu_idle_exit(void) +{ +} + +static inline void rcu_irq_enter(void) +{ +} + +static inline void rcu_irq_exit(void) +{ +} + static inline void exit_rcu(void) { } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index d2e583a6aaca..f22d83f49e56 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -93,6 +93,11 @@ void rcu_force_quiescent_state(void); void rcu_bh_force_quiescent_state(void); void rcu_sched_force_quiescent_state(void); +void rcu_idle_enter(void); +void rcu_idle_exit(void); +void rcu_irq_enter(void); +void rcu_irq_exit(void); + void exit_rcu(void); void rcu_scheduler_starting(void); -- cgit v1.2.3 From f36963c9d3f6f415732710da3acdd8608a9fa0e5 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 9 May 2015 03:14:13 +0930 Subject: cpumask_set_cpu_local_first => cpumask_local_spread, lament da91309e0a7e (cpumask: Utility function to set n'th cpu...) created a genuinely weird function. I never saw it before, it went through DaveM. (He only does this to make us other maintainers feel better about our own mistakes.) cpumask_set_cpu_local_first's purpose is say "I need to spread things across N online cpus, choose the ones on this numa node first"; you call it in a loop. It can fail. One of the two callers ignores this, the other aborts and fails the device open. It can fail in two ways: allocating the off-stack cpumask, or through a convoluted codepath which AFAICT can only occur if cpu_online_mask changes. Which shouldn't happen, because if cpu_online_mask can change while you call this, it could return a now-offline cpu anyway. It contains a nonsensical test "!cpumask_of_node(numa_node)". This was drawn to my attention by Geert, who said this causes a warning on Sparc. It sets a single bit in a cpumask instead of returning a cpu number, because that's what the callers want. It could be made more efficient by passing the previous cpu rather than an index, but that would be more invasive to the callers. Fixes: da91309e0a7e8966d916a74cce42ed170fde06bf Signed-off-by: Rusty Russell (then rebased) Tested-by: Amir Vadai Acked-by: Amir Vadai Acked-by: David S. Miller --- include/linux/cpumask.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 27e285b92b5f..59915ea5373c 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -151,10 +151,8 @@ static inline unsigned int cpumask_any_but(const struct cpumask *mask, return 1; } -static inline int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp) +static inline unsigned int cpumask_local_spread(unsigned int i, int node) { - set_bit(0, cpumask_bits(dstp)); - return 0; } @@ -208,7 +206,7 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *); int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); -int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp); +unsigned int cpumask_local_spread(unsigned int i, int node); /** * for_each_cpu - iterate over every cpu in a mask -- cgit v1.2.3 From ad5fb870c486d932a1749d7853dd70f436a7e03f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 3 Apr 2015 12:05:28 -0400 Subject: e820, efi: add ACPI 6.0 persistent memory types ACPI 6.0 formalizes e820-type-7 and efi-type-14 as persistent memory. Mark it "reserved" and allow it to be claimed by a persistent memory device driver. This definition is in addition to the Linux kernel's existing type-12 definition that was recently added in support of shipping platforms with NVDIMM support that predate ACPI 6.0 (which now classifies type-12 as OEM reserved). Note, /proc/iomem can be consulted for differentiating legacy "Persistent Memory (legacy)" E820_PRAM vs standard "Persistent Memory" E820_PMEM. Cc: Boaz Harrosh Cc: Ingo Molnar Cc: Christoph Hellwig Cc: Andrew Morton Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Jens Axboe Cc: Linus Torvalds Cc: Matthew Wilcox Cc: Thomas Gleixner Acked-by: Jeff Moyer Acked-by: Andy Lutomirski Reviewed-by: Ross Zwisler Acked-by: Christoph Hellwig Tested-by: Toshi Kani Signed-off-by: Dan Williams --- include/linux/efi.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index af5be0368dec..825b6e3d69cb 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -85,7 +85,8 @@ typedef struct { #define EFI_MEMORY_MAPPED_IO 11 #define EFI_MEMORY_MAPPED_IO_PORT_SPACE 12 #define EFI_PAL_CODE 13 -#define EFI_MAX_MEMORY_TYPE 14 +#define EFI_PERSISTENT_MEMORY 14 +#define EFI_MAX_MEMORY_TYPE 15 /* Attribute values: */ #define EFI_MEMORY_UC ((u64)0x0000000000000001ULL) /* uncached */ -- cgit v1.2.3 From 0be964be0d45084245673c971d72a4b51690231d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 May 2015 11:09:35 +0930 Subject: module: Sanitize RCU usage and locking Currently the RCU usage in module is an inconsistent mess of RCU and RCU-sched, this is broken for CONFIG_PREEMPT where synchronize_rcu() does not imply synchronize_sched(). Most usage sites use preempt_{dis,en}able() which is RCU-sched, but (most of) the modification sites use synchronize_rcu(). With the exception of the module bug list, which actually uses RCU. Convert everything over to RCU-sched. Furthermore add lockdep asserts to all sites, because it's not at all clear to me the required locking is observed, esp. on exported functions. Cc: Rusty Russell Acked-by: "Paul E. McKenney" Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Rusty Russell --- include/linux/module.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index c883b86ea964..fb56dd85a862 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -421,14 +421,22 @@ struct symsearch { bool unused; }; -/* Search for an exported symbol by name. */ +/* + * Search for an exported symbol by name. + * + * Must be called with module_mutex held or preemption disabled. + */ const struct kernel_symbol *find_symbol(const char *name, struct module **owner, const unsigned long **crc, bool gplok, bool warn); -/* Walk the exported symbol table */ +/* + * Walk the exported symbol table + * + * Must be called with module_mutex held or preemption disabled. + */ bool each_symbol_section(bool (*fn)(const struct symsearch *arr, struct module *owner, void *data), void *data); -- cgit v1.2.3 From d72da4a4d973d8a0a0d3c97e7cdebf287fbe3a99 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 May 2015 11:09:36 +0930 Subject: rbtree: Make lockless searches non-fatal Change the insert and erase code such that lockless searches are non-fatal. In and of itself an rbtree cannot be correctly searched while in-modification, we can however provide weaker guarantees that will allow the rbtree to be used in conjunction with other techniques, such as latches; see 9b0fd802e8c0 ("seqcount: Add raw_write_seqcount_latch()"). For this to work we need the following guarantees from the rbtree code: 1) a lockless reader must not see partial stores, this would allow it to observe nodes that are invalid memory. 2) there must not be (temporary) loops in the tree structure in the modifier's program order, this would cause a lookup which interrupts the modifier to get stuck indefinitely. For 1) we must use WRITE_ONCE() for all updates to the tree structure; in particular this patch only does rb_{left,right} as those are the only element required for simple searches. It generates slightly worse code, probably because volatile. But in pointer chasing heavy code a few instructions more should not matter. For 2) I have carefully audited the code and drawn every intermediate link state and not found a loop. Cc: Mathieu Desnoyers Cc: "Paul E. McKenney" Cc: Oleg Nesterov Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Rik van Riel Reviewed-by: Michel Lespinasse Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Rusty Russell --- include/linux/rbtree.h | 16 +++++++++++++--- include/linux/rbtree_augmented.h | 21 ++++++++++++++------- 2 files changed, 27 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index fb31765e935a..830c4992088d 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -31,6 +31,7 @@ #include #include +#include struct rb_node { unsigned long __rb_parent_color; @@ -73,11 +74,11 @@ extern struct rb_node *rb_first_postorder(const struct rb_root *); extern struct rb_node *rb_next_postorder(const struct rb_node *); /* Fast replacement of a single node without remove/rebalance/add/rebalance */ -extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, +extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, struct rb_root *root); -static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, - struct rb_node ** rb_link) +static inline void rb_link_node(struct rb_node *node, struct rb_node *parent, + struct rb_node **rb_link) { node->__rb_parent_color = (unsigned long)parent; node->rb_left = node->rb_right = NULL; @@ -85,6 +86,15 @@ static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, *rb_link = node; } +static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent, + struct rb_node **rb_link) +{ + node->__rb_parent_color = (unsigned long)parent; + node->rb_left = node->rb_right = NULL; + + rcu_assign_pointer(*rb_link, node); +} + #define rb_entry_safe(ptr, type, member) \ ({ typeof(ptr) ____ptr = (ptr); \ ____ptr ? rb_entry(____ptr, type, member) : NULL; \ diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h index 378c5ee75f78..14d7b831b63a 100644 --- a/include/linux/rbtree_augmented.h +++ b/include/linux/rbtree_augmented.h @@ -123,11 +123,11 @@ __rb_change_child(struct rb_node *old, struct rb_node *new, { if (parent) { if (parent->rb_left == old) - parent->rb_left = new; + WRITE_ONCE(parent->rb_left, new); else - parent->rb_right = new; + WRITE_ONCE(parent->rb_right, new); } else - root->rb_node = new; + WRITE_ONCE(root->rb_node, new); } extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root, @@ -137,7 +137,8 @@ static __always_inline struct rb_node * __rb_erase_augmented(struct rb_node *node, struct rb_root *root, const struct rb_augment_callbacks *augment) { - struct rb_node *child = node->rb_right, *tmp = node->rb_left; + struct rb_node *child = node->rb_right; + struct rb_node *tmp = node->rb_left; struct rb_node *parent, *rebalance; unsigned long pc; @@ -167,6 +168,7 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root, tmp = parent; } else { struct rb_node *successor = child, *child2; + tmp = child->rb_left; if (!tmp) { /* @@ -180,6 +182,7 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root, */ parent = successor; child2 = successor->rb_right; + augment->copy(node, successor); } else { /* @@ -201,19 +204,23 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root, successor = tmp; tmp = tmp->rb_left; } while (tmp); - parent->rb_left = child2 = successor->rb_right; - successor->rb_right = child; + child2 = successor->rb_right; + WRITE_ONCE(parent->rb_left, child2); + WRITE_ONCE(successor->rb_right, child); rb_set_parent(child, successor); + augment->copy(node, successor); augment->propagate(parent, successor); } - successor->rb_left = tmp = node->rb_left; + tmp = node->rb_left; + WRITE_ONCE(successor->rb_left, tmp); rb_set_parent(tmp, successor); pc = node->__rb_parent_color; tmp = __rb_parent(pc); __rb_change_child(node, successor, tmp, root); + if (child2) { successor->__rb_parent_color = pc; rb_set_parent_color(child2, parent, RB_BLACK); -- cgit v1.2.3 From 6695b92a60bc7160c92d6dc5b17cc79673017c2f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 May 2015 11:09:36 +0930 Subject: seqlock: Better document raw_write_seqcount_latch() Improve the documentation of the latch technique as used in the current timekeeping code, such that it can be readily employed elsewhere. Borrow from the comments in timekeeping and replace those with a reference to this more generic comment. Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Rik van Riel Cc: "Paul E. McKenney" Cc: Oleg Nesterov Reviewed-by: Mathieu Desnoyers Acked-by: Michel Lespinasse Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Rusty Russell --- include/linux/seqlock.h | 76 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 5f68d0a391ce..1c0cf3102fdc 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -233,9 +233,83 @@ static inline void raw_write_seqcount_end(seqcount_t *s) s->sequence++; } -/* +/** * raw_write_seqcount_latch - redirect readers to even/odd copy * @s: pointer to seqcount_t + * + * The latch technique is a multiversion concurrency control method that allows + * queries during non-atomic modifications. If you can guarantee queries never + * interrupt the modification -- e.g. the concurrency is strictly between CPUs + * -- you most likely do not need this. + * + * Where the traditional RCU/lockless data structures rely on atomic + * modifications to ensure queries observe either the old or the new state the + * latch allows the same for non-atomic updates. The trade-off is doubling the + * cost of storage; we have to maintain two copies of the entire data + * structure. + * + * Very simply put: we first modify one copy and then the other. This ensures + * there is always one copy in a stable state, ready to give us an answer. + * + * The basic form is a data structure like: + * + * struct latch_struct { + * seqcount_t seq; + * struct data_struct data[2]; + * }; + * + * Where a modification, which is assumed to be externally serialized, does the + * following: + * + * void latch_modify(struct latch_struct *latch, ...) + * { + * smp_wmb(); <- Ensure that the last data[1] update is visible + * latch->seq++; + * smp_wmb(); <- Ensure that the seqcount update is visible + * + * modify(latch->data[0], ...); + * + * smp_wmb(); <- Ensure that the data[0] update is visible + * latch->seq++; + * smp_wmb(); <- Ensure that the seqcount update is visible + * + * modify(latch->data[1], ...); + * } + * + * The query will have a form like: + * + * struct entry *latch_query(struct latch_struct *latch, ...) + * { + * struct entry *entry; + * unsigned seq, idx; + * + * do { + * seq = latch->seq; + * smp_rmb(); + * + * idx = seq & 0x01; + * entry = data_query(latch->data[idx], ...); + * + * smp_rmb(); + * } while (seq != latch->seq); + * + * return entry; + * } + * + * So during the modification, queries are first redirected to data[1]. Then we + * modify data[0]. When that is complete, we redirect queries back to data[0] + * and we can modify data[1]. + * + * NOTE: The non-requirement for atomic modifications does _NOT_ include + * the publishing of new entries in the case where data is a dynamic + * data structure. + * + * An iteration might start in data[0] and get suspended long enough + * to miss an entire modification sequence, once it resumes it might + * observe the new entry. + * + * NOTE: When data is a dynamic data structure; one should use regular RCU + * patterns to manage the lifetimes of the objects within. */ static inline void raw_write_seqcount_latch(seqcount_t *s) { -- cgit v1.2.3 From 0a04b0166929405cd833c1cc40f99e862b965ddc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 May 2015 11:09:36 +0930 Subject: rcu: Move lockless_dereference() out of rcupdate.h I want to use lockless_dereference() from seqlock.h, which would mean including rcupdate.h from it, however rcupdate.h already includes seqlock.h. Avoid this by moving lockless_dereference() into compiler.h. This is somewhat tricky since it uses smp_read_barrier_depends() which isn't available there, but its a CPP macro so we can get away with it. The alternative would be moving it into asm/barrier.h, but that would be updating each arch (I can do if people feel that is more appropriate). Cc: Paul McKenney Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Rusty Russell --- include/linux/compiler.h | 15 +++++++++++++++ include/linux/rcupdate.h | 15 --------------- 2 files changed, 15 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 867722591be2..eae42c21d5fd 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -457,6 +457,21 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s (volatile typeof(x) *)&(x); }) #define ACCESS_ONCE(x) (*__ACCESS_ONCE(x)) +/** + * lockless_dereference() - safely load a pointer for later dereference + * @p: The pointer to load + * + * Similar to rcu_dereference(), but for situations where the pointed-to + * object's lifetime is managed by something other than RCU. That + * "something other" might be reference counting or simple immortality. + */ +#define lockless_dereference(p) \ +({ \ + typeof(p) _________p1 = ACCESS_ONCE(p); \ + smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ + (_________p1); \ +}) + /* Ignore/forbid kprobes attach on very low level functions marked by this attribute: */ #ifdef CONFIG_KPROBES # define __kprobes __attribute__((__section__(".kprobes.text"))) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 573a5afd5ed8..0356ad954ea5 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -649,21 +649,6 @@ static inline void rcu_preempt_sleep_check(void) */ #define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v) -/** - * lockless_dereference() - safely load a pointer for later dereference - * @p: The pointer to load - * - * Similar to rcu_dereference(), but for situations where the pointed-to - * object's lifetime is managed by something other than RCU. That - * "something other" might be reference counting or simple immortality. - */ -#define lockless_dereference(p) \ -({ \ - typeof(p) _________p1 = ACCESS_ONCE(p); \ - smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ - (_________p1); \ -}) - /** * rcu_assign_pointer() - assign to RCU-protected pointer * @p: pointer to assign to -- cgit v1.2.3 From 7fc26327b75685f37f58d64bdb061460f834f80d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 May 2015 11:09:36 +0930 Subject: seqlock: Introduce raw_read_seqcount_latch() Because with latches there is a strict data dependency on the seq load we can avoid the rmb in favour of a read_barrier_depends. Suggested-by: Ingo Molnar Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Rusty Russell --- include/linux/seqlock.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 1c0cf3102fdc..890c7ef709d5 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -35,6 +35,7 @@ #include #include #include +#include #include /* @@ -233,6 +234,11 @@ static inline void raw_write_seqcount_end(seqcount_t *s) s->sequence++; } +static inline int raw_read_seqcount_latch(seqcount_t *s) +{ + return lockless_dereference(s->sequence); +} + /** * raw_write_seqcount_latch - redirect readers to even/odd copy * @s: pointer to seqcount_t @@ -284,8 +290,7 @@ static inline void raw_write_seqcount_end(seqcount_t *s) * unsigned seq, idx; * * do { - * seq = latch->seq; - * smp_rmb(); + * seq = lockless_dereference(latch->seq); * * idx = seq & 0x01; * entry = data_query(latch->data[idx], ...); -- cgit v1.2.3 From ade3f510f93a5613b672febe88eff8ea7f1c63b7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 May 2015 11:09:37 +0930 Subject: rbtree: Implement generic latch_tree Implement a latched RB-tree in order to get unconditional RCU/lockless lookups. Cc: Oleg Nesterov Cc: Michel Lespinasse Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Rik van Riel Cc: Mathieu Desnoyers Cc: "Paul E. McKenney" Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Rusty Russell --- include/linux/rbtree_latch.h | 212 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 212 insertions(+) create mode 100644 include/linux/rbtree_latch.h (limited to 'include/linux') diff --git a/include/linux/rbtree_latch.h b/include/linux/rbtree_latch.h new file mode 100644 index 000000000000..4f3432c61d12 --- /dev/null +++ b/include/linux/rbtree_latch.h @@ -0,0 +1,212 @@ +/* + * Latched RB-trees + * + * Copyright (C) 2015 Intel Corp., Peter Zijlstra + * + * Since RB-trees have non-atomic modifications they're not immediately suited + * for RCU/lockless queries. Even though we made RB-tree lookups non-fatal for + * lockless lookups; we cannot guarantee they return a correct result. + * + * The simplest solution is a seqlock + RB-tree, this will allow lockless + * lookups; but has the constraint (inherent to the seqlock) that read sides + * cannot nest in write sides. + * + * If we need to allow unconditional lookups (say as required for NMI context + * usage) we need a more complex setup; this data structure provides this by + * employing the latch technique -- see @raw_write_seqcount_latch -- to + * implement a latched RB-tree which does allow for unconditional lookups by + * virtue of always having (at least) one stable copy of the tree. + * + * However, while we have the guarantee that there is at all times one stable + * copy, this does not guarantee an iteration will not observe modifications. + * What might have been a stable copy at the start of the iteration, need not + * remain so for the duration of the iteration. + * + * Therefore, this does require a lockless RB-tree iteration to be non-fatal; + * see the comment in lib/rbtree.c. Note however that we only require the first + * condition -- not seeing partial stores -- because the latch thing isolates + * us from loops. If we were to interrupt a modification the lookup would be + * pointed at the stable tree and complete while the modification was halted. + */ + +#ifndef RB_TREE_LATCH_H +#define RB_TREE_LATCH_H + +#include +#include + +struct latch_tree_node { + struct rb_node node[2]; +}; + +struct latch_tree_root { + seqcount_t seq; + struct rb_root tree[2]; +}; + +/** + * latch_tree_ops - operators to define the tree order + * @less: used for insertion; provides the (partial) order between two elements. + * @comp: used for lookups; provides the order between the search key and an element. + * + * The operators are related like: + * + * comp(a->key,b) < 0 := less(a,b) + * comp(a->key,b) > 0 := less(b,a) + * comp(a->key,b) == 0 := !less(a,b) && !less(b,a) + * + * If these operators define a partial order on the elements we make no + * guarantee on which of the elements matching the key is found. See + * latch_tree_find(). + */ +struct latch_tree_ops { + bool (*less)(struct latch_tree_node *a, struct latch_tree_node *b); + int (*comp)(void *key, struct latch_tree_node *b); +}; + +static __always_inline struct latch_tree_node * +__lt_from_rb(struct rb_node *node, int idx) +{ + return container_of(node, struct latch_tree_node, node[idx]); +} + +static __always_inline void +__lt_insert(struct latch_tree_node *ltn, struct latch_tree_root *ltr, int idx, + bool (*less)(struct latch_tree_node *a, struct latch_tree_node *b)) +{ + struct rb_root *root = <r->tree[idx]; + struct rb_node **link = &root->rb_node; + struct rb_node *node = <n->node[idx]; + struct rb_node *parent = NULL; + struct latch_tree_node *ltp; + + while (*link) { + parent = *link; + ltp = __lt_from_rb(parent, idx); + + if (less(ltn, ltp)) + link = &parent->rb_left; + else + link = &parent->rb_right; + } + + rb_link_node_rcu(node, parent, link); + rb_insert_color(node, root); +} + +static __always_inline void +__lt_erase(struct latch_tree_node *ltn, struct latch_tree_root *ltr, int idx) +{ + rb_erase(<n->node[idx], <r->tree[idx]); +} + +static __always_inline struct latch_tree_node * +__lt_find(void *key, struct latch_tree_root *ltr, int idx, + int (*comp)(void *key, struct latch_tree_node *node)) +{ + struct rb_node *node = rcu_dereference_raw(ltr->tree[idx].rb_node); + struct latch_tree_node *ltn; + int c; + + while (node) { + ltn = __lt_from_rb(node, idx); + c = comp(key, ltn); + + if (c < 0) + node = rcu_dereference_raw(node->rb_left); + else if (c > 0) + node = rcu_dereference_raw(node->rb_right); + else + return ltn; + } + + return NULL; +} + +/** + * latch_tree_insert() - insert @node into the trees @root + * @node: nodes to insert + * @root: trees to insert @node into + * @ops: operators defining the node order + * + * It inserts @node into @root in an ordered fashion such that we can always + * observe one complete tree. See the comment for raw_write_seqcount_latch(). + * + * The inserts use rcu_assign_pointer() to publish the element such that the + * tree structure is stored before we can observe the new @node. + * + * All modifications (latch_tree_insert, latch_tree_remove) are assumed to be + * serialized. + */ +static __always_inline void +latch_tree_insert(struct latch_tree_node *node, + struct latch_tree_root *root, + const struct latch_tree_ops *ops) +{ + raw_write_seqcount_latch(&root->seq); + __lt_insert(node, root, 0, ops->less); + raw_write_seqcount_latch(&root->seq); + __lt_insert(node, root, 1, ops->less); +} + +/** + * latch_tree_erase() - removes @node from the trees @root + * @node: nodes to remote + * @root: trees to remove @node from + * @ops: operators defining the node order + * + * Removes @node from the trees @root in an ordered fashion such that we can + * always observe one complete tree. See the comment for + * raw_write_seqcount_latch(). + * + * It is assumed that @node will observe one RCU quiescent state before being + * reused of freed. + * + * All modifications (latch_tree_insert, latch_tree_remove) are assumed to be + * serialized. + */ +static __always_inline void +latch_tree_erase(struct latch_tree_node *node, + struct latch_tree_root *root, + const struct latch_tree_ops *ops) +{ + raw_write_seqcount_latch(&root->seq); + __lt_erase(node, root, 0); + raw_write_seqcount_latch(&root->seq); + __lt_erase(node, root, 1); +} + +/** + * latch_tree_find() - find the node matching @key in the trees @root + * @key: search key + * @root: trees to search for @key + * @ops: operators defining the node order + * + * Does a lockless lookup in the trees @root for the node matching @key. + * + * It is assumed that this is called while holding the appropriate RCU read + * side lock. + * + * If the operators define a partial order on the elements (there are multiple + * elements which have the same key value) it is undefined which of these + * elements will be found. Nor is it possible to iterate the tree to find + * further elements with the same key value. + * + * Returns: a pointer to the node matching @key or NULL. + */ +static __always_inline struct latch_tree_node * +latch_tree_find(void *key, struct latch_tree_root *root, + const struct latch_tree_ops *ops) +{ + struct latch_tree_node *node; + unsigned int seq; + + do { + seq = raw_read_seqcount_latch(&root->seq); + node = __lt_find(key, root, seq & 1, ops->comp); + } while (read_seqcount_retry(&root->seq, seq)); + + return node; +} + +#endif /* RB_TREE_LATCH_H */ -- cgit v1.2.3 From 93c2e105f6bcee231c951ba0e56e84505c4b0483 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 May 2015 11:09:37 +0930 Subject: module: Optimize __module_address() using a latched RB-tree Currently __module_address() is using a linear search through all modules in order to find the module corresponding to the provided address. With a lot of modules this can take a lot of time. One of the users of this is kernel_text_address() which is employed in many stack unwinders; which in turn are used by perf-callchain and ftrace (possibly from NMI context). So by optimizing __module_address() we optimize many stack unwinders which are used by both perf and tracing in performance sensitive code. Cc: Rusty Russell Cc: Steven Rostedt Cc: Mathieu Desnoyers Cc: Oleg Nesterov Cc: "Paul E. McKenney" Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Rusty Russell --- include/linux/module.h | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index fb56dd85a862..ddf35a3368fb 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -210,6 +211,13 @@ enum module_state { MODULE_STATE_UNFORMED, /* Still setting it up. */ }; +struct module; + +struct mod_tree_node { + struct module *mod; + struct latch_tree_node node; +}; + struct module { enum module_state state; @@ -269,8 +277,15 @@ struct module { /* Startup function. */ int (*init)(void); - /* If this is non-NULL, vfree after init() returns */ - void *module_init; + /* + * If this is non-NULL, vfree() after init() returns. + * + * Cacheline align here, such that: + * module_init, module_core, init_size, core_size, + * init_text_size, core_text_size and ltn_core.node[0] + * are on the same cacheline. + */ + void *module_init ____cacheline_aligned; /* Here is the actual code + data, vfree'd on unload. */ void *module_core; @@ -281,6 +296,14 @@ struct module { /* The size of the executable code in each section. */ unsigned int init_text_size, core_text_size; + /* + * We want mtn_core::{mod,node[0]} to be in the same cacheline as the + * above entries such that a regular lookup will only touch one + * cacheline. + */ + struct mod_tree_node mtn_core; + struct mod_tree_node mtn_init; + /* Size of RO sections of the module (text+rodata) */ unsigned int init_ro_size, core_ro_size; @@ -367,7 +390,7 @@ struct module { ctor_fn_t *ctors; unsigned int num_ctors; #endif -}; +} ____cacheline_aligned; #ifndef MODULE_ARCH_INIT #define MODULE_ARCH_INIT {} #endif -- cgit v1.2.3 From 6c9692e2d6a2206d8fd75ea247daa47fb75e4a02 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 May 2015 11:09:37 +0930 Subject: module: Make the mod_tree stuff conditional on PERF_EVENTS || TRACING Andrew worried about the overhead on small systems; only use the fancy code when either perf or tracing is enabled. Cc: Rusty Russell Cc: Steven Rostedt Requested-by: Andrew Morton Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Rusty Russell --- include/linux/module.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index ddf35a3368fb..4c1b02e1361d 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -282,7 +282,7 @@ struct module { * * Cacheline align here, such that: * module_init, module_core, init_size, core_size, - * init_text_size, core_text_size and ltn_core.node[0] + * init_text_size, core_text_size and mtn_core::{mod,node[0]} * are on the same cacheline. */ void *module_init ____cacheline_aligned; @@ -296,6 +296,7 @@ struct module { /* The size of the executable code in each section. */ unsigned int init_text_size, core_text_size; +#ifdef CONFIG_MODULES_TREE_LOOKUP /* * We want mtn_core::{mod,node[0]} to be in the same cacheline as the * above entries such that a regular lookup will only touch one @@ -303,6 +304,7 @@ struct module { */ struct mod_tree_node mtn_core; struct mod_tree_node mtn_init; +#endif /* Size of RO sections of the module (text+rodata) */ unsigned int init_ro_size, core_ro_size; -- cgit v1.2.3 From 28b8d0c8f560300836dff352348e513cdf328e50 Mon Sep 17 00:00:00 2001 From: Gobinda Charan Maji Date: Wed, 27 May 2015 11:09:38 +0930 Subject: sysfs: tightened sysfs permission checks There were some inconsistency in restriction to VERIFY_OCTAL_PERMISSIONS(). Previously the test was "User perms >= group perms >= other perms". The permission field of User, Group or Other consists of three bits. LSB is EXECUTE permission, MSB is READ permission and the middle bit is WRITE permission. But logically WRITE is "more privileged" than READ. Say for example, permission value is "0430". Here User has only READ permission whereas Group has both WRITE and EXECUTE permission. So, the checks could be tightened and the tests are separated to USER_READABLE >= GROUP_READABLE >= OTHER_READABLE, USER_WRITABLE >= GROUP_WRITABLE and OTHER_WRITABLE is not permitted. Signed-off-by: Gobinda Charan Maji Signed-off-by: Rusty Russell --- include/linux/kernel.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 3a5b48e52a9e..cd54b357c934 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -818,13 +818,15 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } #endif /* Permissions on a sysfs file: you didn't miss the 0 prefix did you? */ -#define VERIFY_OCTAL_PERMISSIONS(perms) \ - (BUILD_BUG_ON_ZERO((perms) < 0) + \ - BUILD_BUG_ON_ZERO((perms) > 0777) + \ - /* User perms >= group perms >= other perms */ \ - BUILD_BUG_ON_ZERO(((perms) >> 6) < (((perms) >> 3) & 7)) + \ - BUILD_BUG_ON_ZERO((((perms) >> 3) & 7) < ((perms) & 7)) + \ - /* Other writable? Generally considered a bad idea. */ \ - BUILD_BUG_ON_ZERO((perms) & 2) + \ +#define VERIFY_OCTAL_PERMISSIONS(perms) \ + (BUILD_BUG_ON_ZERO((perms) < 0) + \ + BUILD_BUG_ON_ZERO((perms) > 0777) + \ + /* USER_READABLE >= GROUP_READABLE >= OTHER_READABLE */ \ + BUILD_BUG_ON_ZERO((((perms) >> 6) & 4) < (((perms) >> 3) & 4)) + \ + BUILD_BUG_ON_ZERO((((perms) >> 3) & 4) < ((perms) & 4)) + \ + /* USER_WRITABLE >= GROUP_WRITABLE */ \ + BUILD_BUG_ON_ZERO((((perms) >> 6) & 2) < (((perms) >> 3) & 2)) + \ + /* OTHER_WRITABLE? Generally considered a bad idea. */ \ + BUILD_BUG_ON_ZERO((perms) & 2) + \ (perms)) #endif -- cgit v1.2.3 From 9c27847dda9cfae7c273cde62becf364f9fa9ea3 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 27 May 2015 11:09:38 +0930 Subject: kernel/params: constify struct kernel_param_ops uses Most code already uses consts for the struct kernel_param_ops, sweep the kernel for the last offending stragglers. Other than include/linux/moduleparam.h and kernel/params.c all other changes were generated with the following Coccinelle SmPL patch. Merge conflicts between trees can be handled with Coccinelle. In the future git could get Coccinelle merge support to deal with patch --> fail --> grammar --> Coccinelle --> new patch conflicts automatically for us on patches where the grammar is available and the patch is of high confidence. Consider this a feature request. Test compiled on x86_64 against: * allnoconfig * allmodconfig * allyesconfig @ const_found @ identifier ops; @@ const struct kernel_param_ops ops = { }; @ const_not_found depends on !const_found @ identifier ops; @@ -struct kernel_param_ops ops = { +const struct kernel_param_ops ops = { }; Generated-by: Coccinelle SmPL Cc: Rusty Russell Cc: Junio C Hamano Cc: Andrew Morton Cc: Kees Cook Cc: Tejun Heo Cc: Ingo Molnar Cc: cocci@systeme.lip6.fr Cc: linux-kernel@vger.kernel.org Signed-off-by: Luis R. Rodriguez Signed-off-by: Rusty Russell --- include/linux/moduleparam.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 1c9effa25e26..5d0f4d97997f 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -225,7 +225,7 @@ struct kparam_array /* Obsolete - use module_param_cb() */ #define module_param_call(name, set, get, arg, perm) \ - static struct kernel_param_ops __param_ops_##name = \ + static const struct kernel_param_ops __param_ops_##name = \ { .flags = 0, (void *)set, (void *)get }; \ __module_param_call(MODULE_PARAM_PREFIX, \ name, &__param_ops_##name, arg, \ @@ -376,64 +376,64 @@ static inline void destroy_params(const struct kernel_param *params, #define __param_check(name, p, type) \ static inline type __always_unused *__check_##name(void) { return(p); } -extern struct kernel_param_ops param_ops_byte; +extern const struct kernel_param_ops param_ops_byte; extern int param_set_byte(const char *val, const struct kernel_param *kp); extern int param_get_byte(char *buffer, const struct kernel_param *kp); #define param_check_byte(name, p) __param_check(name, p, unsigned char) -extern struct kernel_param_ops param_ops_short; +extern const struct kernel_param_ops param_ops_short; extern int param_set_short(const char *val, const struct kernel_param *kp); extern int param_get_short(char *buffer, const struct kernel_param *kp); #define param_check_short(name, p) __param_check(name, p, short) -extern struct kernel_param_ops param_ops_ushort; +extern const struct kernel_param_ops param_ops_ushort; extern int param_set_ushort(const char *val, const struct kernel_param *kp); extern int param_get_ushort(char *buffer, const struct kernel_param *kp); #define param_check_ushort(name, p) __param_check(name, p, unsigned short) -extern struct kernel_param_ops param_ops_int; +extern const struct kernel_param_ops param_ops_int; extern int param_set_int(const char *val, const struct kernel_param *kp); extern int param_get_int(char *buffer, const struct kernel_param *kp); #define param_check_int(name, p) __param_check(name, p, int) -extern struct kernel_param_ops param_ops_uint; +extern const struct kernel_param_ops param_ops_uint; extern int param_set_uint(const char *val, const struct kernel_param *kp); extern int param_get_uint(char *buffer, const struct kernel_param *kp); #define param_check_uint(name, p) __param_check(name, p, unsigned int) -extern struct kernel_param_ops param_ops_long; +extern const struct kernel_param_ops param_ops_long; extern int param_set_long(const char *val, const struct kernel_param *kp); extern int param_get_long(char *buffer, const struct kernel_param *kp); #define param_check_long(name, p) __param_check(name, p, long) -extern struct kernel_param_ops param_ops_ulong; +extern const struct kernel_param_ops param_ops_ulong; extern int param_set_ulong(const char *val, const struct kernel_param *kp); extern int param_get_ulong(char *buffer, const struct kernel_param *kp); #define param_check_ulong(name, p) __param_check(name, p, unsigned long) -extern struct kernel_param_ops param_ops_ullong; +extern const struct kernel_param_ops param_ops_ullong; extern int param_set_ullong(const char *val, const struct kernel_param *kp); extern int param_get_ullong(char *buffer, const struct kernel_param *kp); #define param_check_ullong(name, p) __param_check(name, p, unsigned long long) -extern struct kernel_param_ops param_ops_charp; +extern const struct kernel_param_ops param_ops_charp; extern int param_set_charp(const char *val, const struct kernel_param *kp); extern int param_get_charp(char *buffer, const struct kernel_param *kp); #define param_check_charp(name, p) __param_check(name, p, char *) /* We used to allow int as well as bool. We're taking that away! */ -extern struct kernel_param_ops param_ops_bool; +extern const struct kernel_param_ops param_ops_bool; extern int param_set_bool(const char *val, const struct kernel_param *kp); extern int param_get_bool(char *buffer, const struct kernel_param *kp); #define param_check_bool(name, p) __param_check(name, p, bool) -extern struct kernel_param_ops param_ops_invbool; +extern const struct kernel_param_ops param_ops_invbool; extern int param_set_invbool(const char *val, const struct kernel_param *kp); extern int param_get_invbool(char *buffer, const struct kernel_param *kp); #define param_check_invbool(name, p) __param_check(name, p, bool) /* An int, which can only be set like a bool (though it shows as an int). */ -extern struct kernel_param_ops param_ops_bint; +extern const struct kernel_param_ops param_ops_bint; extern int param_set_bint(const char *val, const struct kernel_param *kp); #define param_get_bint param_get_int #define param_check_bint param_check_int @@ -477,9 +477,9 @@ extern int param_set_bint(const char *val, const struct kernel_param *kp); perm, -1, 0); \ __MODULE_PARM_TYPE(name, "array of " #type) -extern struct kernel_param_ops param_array_ops; +extern const struct kernel_param_ops param_array_ops; -extern struct kernel_param_ops param_ops_string; +extern const struct kernel_param_ops param_ops_string; extern int param_set_copystring(const char *val, const struct kernel_param *); extern int param_get_string(char *buffer, const struct kernel_param *kp); -- cgit v1.2.3 From d19f05d8a8fa221e5d5f4eaca0f3ca5874c990d3 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 27 May 2015 11:09:38 +0930 Subject: kernel/params.c: generalize bool_enable_only This takes out the bool_enable_only implementation from the module loading code and generalizes it so that others can make use of it. Cc: Rusty Russell Cc: Jani Nikula Cc: Andrew Morton Cc: Kees Cook Cc: Tejun Heo Cc: Ingo Molnar Cc: linux-kernel@vger.kernel.org Cc: cocci@systeme.lip6.fr Signed-off-by: Luis R. Rodriguez Signed-off-by: Rusty Russell --- include/linux/moduleparam.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 5d0f4d97997f..7e0079936396 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -427,6 +427,12 @@ extern int param_set_bool(const char *val, const struct kernel_param *kp); extern int param_get_bool(char *buffer, const struct kernel_param *kp); #define param_check_bool(name, p) __param_check(name, p, bool) +extern const struct kernel_param_ops param_ops_bool_enable_only; +extern int param_set_bool_enable_only(const char *val, + const struct kernel_param *kp); +/* getter is the same as for the regular bool */ +#define param_check_bool_enable_only param_check_bool + extern const struct kernel_param_ops param_ops_invbool; extern int param_set_invbool(const char *val, const struct kernel_param *kp); extern int param_get_invbool(char *buffer, const struct kernel_param *kp); -- cgit v1.2.3 From f36f3f2846b5578d62910ee0b6dbef59fdd1cfa4 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 18 May 2015 13:20:23 +0200 Subject: KVM: add "new" argument to kvm_arch_commit_memory_region This lets the function access the new memory slot without going through kvm_memslots and id_to_memslot. It will simplify the code when more than one address space will be supported. Unfortunately, the "const"ness of the new argument must be casted away in two places. Fixing KVM to accept const struct kvm_memory_slot pointers would require modifications in pretty much all architectures, and is left for later. Reviewed-by: Radim Krcmar Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8815f1dffb77..9bd3bc16be87 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -516,6 +516,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, + const struct kvm_memory_slot *new, enum kvm_mr_change change); bool kvm_largepages_enabled(void); void kvm_disable_largepages(void); -- cgit v1.2.3 From d9ef13c2b3983de8dd1373ef670799dbb6498122 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 19 May 2015 16:01:50 +0200 Subject: KVM: pass kvm_memory_slot to gfn_to_page_many_atomic The memory slot is already available from gfn_to_memslot_dirty_bitmap. Isn't it a shame to look it up again? Plus, it makes gfn_to_page_many_atomic agnostic of multiple VCPU address spaces. Reviewed-by: Radim Krcmar Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9bd3bc16be87..a8bcbc9c6078 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -526,8 +526,8 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm); void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot); -int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, - int nr_pages); +int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn, + struct page **pages, int nr_pages); struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); -- cgit v1.2.3 From bfa1ce5f38938cc9e6c7f2d1011f88eba2b9e2b2 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Thu, 28 May 2015 11:40:54 +0200 Subject: bus: mvebu-mbus: add mv_mbus_dram_info_nooverlap() This commit introduces a variant of the mv_mbus_dram_info() function called mv_mbus_dram_info_nooverlap(). Both functions are used by Marvell drivers supporting devices doing DMA, and provide them a description the DRAM ranges that they need to configure their DRAM windows. The ranges provided by the mv_mbus_dram_info() function may overlap with the I/O windows if there is a lot (>= 4 GB) of RAM installed. This is not a problem for most of the DMA masters, except for the upcoming new CESA crypto driver because it does DMA to the SRAM, which is mapped through an I/O window. For this unit, we need to have DRAM ranges that do not overlap with the I/O windows. A first implementation done in commit 1737cac69369 ("bus: mvebu-mbus: make sure SDRAM CS for DMA don't overlap the MBus bridge window"), changed the information returned by mv_mbus_dram_info() to match this requirement. However, it broke the requirement of the other DMA masters than the DRAM ranges should have power of two sizes. To solve this situation, this commit introduces a new mv_mbus_dram_info_nooverlap() function, which returns the same information as mv_mbus_dram_info(), but guaranteed to not overlap with the I/O windows. In the end, it gives us two variants of the mv_mbus_dram_info*() functions: - The normal one, mv_mbus_dram_info(), which has been around for many years. This function returns the raw DRAM ranges, which are guaranteed to use power of two sizes, but will overlap with I/O windows. This function will therefore be used by all DMA masters (SATA, XOR, Ethernet, etc.) except the CESA crypto driver. - The new 'nooverlap' variant, mv_mbus_dram_info_nooverlap(). This function returns DRAM ranges after they have been "tweaked" to make sure they don't overlap with I/O windows. By doing this tweaking, we remove the power of two size guarantee. This variant will be used by the new CESA crypto driver. Signed-off-by: Thomas Petazzoni Signed-off-by: Gregory CLEMENT --- include/linux/mbus.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mbus.h b/include/linux/mbus.h index 611b69fa8594..1f7bc630d225 100644 --- a/include/linux/mbus.h +++ b/include/linux/mbus.h @@ -54,11 +54,16 @@ struct mbus_dram_target_info */ #ifdef CONFIG_PLAT_ORION extern const struct mbus_dram_target_info *mv_mbus_dram_info(void); +extern const struct mbus_dram_target_info *mv_mbus_dram_info_nooverlap(void); #else static inline const struct mbus_dram_target_info *mv_mbus_dram_info(void) { return NULL; } +static inline const struct mbus_dram_target_info *mv_mbus_dram_info_nooverlap(void) +{ + return NULL; +} #endif int mvebu_mbus_save_cpu_target(u32 *store_addr); -- cgit v1.2.3 From 85e6f09785bd06f0510bdb00c40772c7f8da3c43 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Tue, 19 May 2015 16:16:14 +0100 Subject: ARM: 8367/1: sa1100: prepare for moving irq driver to drivers/irqchip Prepare for moving sa1100 irq driver to irqchip infrastructure - split sa1100_init_irq into helper code and irq parts. Signed-off-by: Dmitry Eremin-Solenikov Acked-by: Thomas Gleixner Signed-off-by: Russell King --- include/linux/irqchip/irq-sa11x0.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 include/linux/irqchip/irq-sa11x0.h (limited to 'include/linux') diff --git a/include/linux/irqchip/irq-sa11x0.h b/include/linux/irqchip/irq-sa11x0.h new file mode 100644 index 000000000000..15db6829c1e4 --- /dev/null +++ b/include/linux/irqchip/irq-sa11x0.h @@ -0,0 +1,17 @@ +/* + * Generic IRQ handling for the SA11x0. + * + * Copyright (C) 2015 Dmitry Eremin-Solenikov + * Copyright (C) 1999-2001 Nicolas Pitre + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __INCLUDE_LINUX_IRQCHIP_IRQ_SA11x0_H +#define __INCLUDE_LINUX_IRQCHIP_IRQ_SA11x0_H + +void __init sa11x0_init_irq_nodt(int irq_start, resource_size_t io_start); + +#endif -- cgit v1.2.3 From 391b459d7623b26153c7d8c200e8d213440a7d48 Mon Sep 17 00:00:00 2001 From: Pantelis Antoniou Date: Fri, 24 Apr 2015 12:41:56 +0300 Subject: of: Move OF flags to be visible even when !CONFIG_OF We need those to be visible even when compiling with CONFIG_OF disabled, since even the empty of_node_*_flag() method use the flag. Signed-off-by: Pantelis Antoniou Acked-by: Wolfram Sang Signed-off-by: Rob Herring --- include/linux/of.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index ddeaae6d2083..9b32cbdf0230 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -120,6 +120,12 @@ extern struct device_node *of_aliases; extern struct device_node *of_stdout; extern raw_spinlock_t devtree_lock; +/* flag descriptions (need to be visible even when !CONFIG_OF) */ +#define OF_DYNAMIC 1 /* node and properties were allocated via kmalloc */ +#define OF_DETACHED 2 /* node has been detached from the device tree */ +#define OF_POPULATED 3 /* device already created for the node */ +#define OF_POPULATED_BUS 4 /* of_platform_populate recursed to children of this node */ + #ifdef CONFIG_OF static inline bool is_of_node(struct fwnode_handle *fwnode) { @@ -217,12 +223,6 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size) #define of_node_cmp(s1, s2) strcasecmp((s1), (s2)) #endif -/* flag descriptions */ -#define OF_DYNAMIC 1 /* node and properties were allocated via kmalloc */ -#define OF_DETACHED 2 /* node has been detached from the device tree */ -#define OF_POPULATED 3 /* device already created for the node */ -#define OF_POPULATED_BUS 4 /* of_platform_populate recursed to children of this node */ - #define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags) #define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags) -- cgit v1.2.3 From 31712c98f8180c7bd3f33eefa461f0e1142e18f5 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 4 May 2015 19:42:19 +0200 Subject: of: Grammar s/property exist/property exists/ Signed-off-by: Geert Uytterhoeven Signed-off-by: Rob Herring --- include/linux/of.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 9b32cbdf0230..a3eb9d1e5800 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -815,7 +815,7 @@ static inline int of_property_read_string_index(struct device_node *np, * @propname: name of the property to be searched. * * Search for a property in a device node. - * Returns true if the property exist false otherwise. + * Returns true if the property exists false otherwise. */ static inline bool of_property_read_bool(const struct device_node *np, const char *propname) -- cgit v1.2.3 From 307c858bc245da5229b30186546590e1d472fc8f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 28 May 2015 16:08:02 +0200 Subject: usb: phy: add static inline wrapper for devm_usb_get_phy_by_node The newly introduced devm_usb_get_phy_by_node function only has an extern declaration, but no alternative for the case that CONFIG_USB_PHY is disabled, which leads to a build error when it is used anyway: drivers/power/twl4030_charger.c: In function 'twl4030_bci_probe': drivers/power/twl4030_charger.c:648:23: error: implicit declaration of function 'devm_usb_get_phy_by_node' [-Werror=implicit-function-declaration] bci->transceiver = devm_usb_get_phy_by_node( This adds the wrapper in the same way that we have one for all other usb-phy API functions. Signed-off-by: Arnd Bergmann Fixes: e842b84c8e7 ("usb: phy: Add interface to get phy give of device_node.") Signed-off-by: Felipe Balbi --- include/linux/usb/phy.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/phy.h b/include/linux/usb/phy.h index 8ed1e29ef329..e39f251cf861 100644 --- a/include/linux/usb/phy.h +++ b/include/linux/usb/phy.h @@ -240,6 +240,12 @@ static inline struct usb_phy *devm_usb_get_phy_by_phandle(struct device *dev, return ERR_PTR(-ENXIO); } +static inline struct usb_phy *devm_usb_get_phy_by_node(struct device *dev, + struct device_node *node, struct notifier_block *nb) +{ + return ERR_PTR(-ENXIO); +} + static inline void usb_put_phy(struct usb_phy *x) { } -- cgit v1.2.3 From 9626b6993b2e6faf047d2d96958e8474edc9c7a5 Mon Sep 17 00:00:00 2001 From: jilai wang Date: Fri, 10 Apr 2015 16:15:59 -0400 Subject: firmware: qcom: scm: Add HDCP Support HDCP driver needs to check if secure environment supports HDCP. If it's supported, then it requires to program some registers through SCM. Add qcom_scm_hdcp_available and qcom_scm_hdcp_req to support these requirements. Signed-off-by: Jilai Wang Signed-off-by: Kumar Gala --- include/linux/qcom_scm.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index d7a974d5f57c..6e7d5ec65838 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-2014, The Linux Foundation. All rights reserved. +/* Copyright (c) 2010-2015, The Linux Foundation. All rights reserved. * Copyright (C) 2015 Linaro Ltd. * * This program is free software; you can redistribute it and/or modify @@ -16,6 +16,17 @@ extern int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus); extern int qcom_scm_set_warm_boot_addr(void *entry, const cpumask_t *cpus); +#define QCOM_SCM_HDCP_MAX_REQ_CNT 5 + +struct qcom_scm_hdcp_req { + u32 addr; + u32 val; +}; + +extern bool qcom_scm_hdcp_available(void); +extern int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, + u32 *resp); + #define QCOM_SCM_CPU_PWR_DOWN_L2_ON 0x0 #define QCOM_SCM_CPU_PWR_DOWN_L2_OFF 0x1 -- cgit v1.2.3 From aad653a0bc09dd4ebcb5579f9f835bbae9ef2ba3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 19 May 2015 15:58:37 +1000 Subject: block: discard bdi_unregister() in favour of bdi_destroy() bdi_unregister() now contains very little functionality. It contains a "WARN_ON" if bdi->dev is NULL. This warning is of no real consequence as bdi->dev isn't needed by anything else in the function, and it triggers if blk_cleanup_queue() -> bdi_destroy() is called before bdi_unregister, which happens since Commit: 6cd18e711dd8 ("block: destroy bdi before blockdev is unregistered.") So this isn't wanted. It also calls bdi_set_min_ratio(). This needs to be called after writes through the bdi have all been flushed, and before the bdi is destroyed. Calling it early is better than calling it late as it frees up a global resource. Calling it immediately after bdi_wb_shutdown() in bdi_destroy() perfectly fits these requirements. So bdi_unregister() can be discarded with the important content moved to bdi_destroy(), as can the writeback_bdi_unregister event which is already not used. Reported-by: Mike Snitzer Cc: stable@vger.kernel.org (v4.0) Fixes: c4db59d31e39 ("fs: don't reassign dirty inodes to default_backing_dev_info") Fixes: 6cd18e711dd8 ("block: destroy bdi before blockdev is unregistered.") Acked-by: Peter Zijlstra (Intel) Acked-by: Dan Williams Tested-by: Nicholas Moulin Signed-off-by: NeilBrown Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index aff923ae8c4b..d87d8eced064 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -116,7 +116,6 @@ __printf(3, 4) int bdi_register(struct backing_dev_info *bdi, struct device *parent, const char *fmt, ...); int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); -void bdi_unregister(struct backing_dev_info *bdi); int __must_check bdi_setup_and_register(struct backing_dev_info *, char *); void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, enum wb_reason reason); -- cgit v1.2.3 From 3386e0fa905c31bf1dafa2cbe2ecceb7e5ce2e66 Mon Sep 17 00:00:00 2001 From: Joachim Eastwood Date: Wed, 6 May 2015 20:09:09 +0200 Subject: of: add helper function to retrive match data It's a common operation for device drivers to retrive the data member from of_device_id struct in their probe function. Most driver end up doing: const struct of_device_id *match; match = of_match_device(driver_of_match, &pdev->dev); driver->data = match->data; With the of_device_get_match_data helper function all this can done in one go. Signed-off-by: Joachim Eastwood [robh: add missing inline to dummmy declaration] Signed-off-by: Rob Herring --- include/linux/of_device.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_device.h b/include/linux/of_device.h index 22801b10cef5..4c508549833a 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -33,6 +33,8 @@ extern int of_device_add(struct platform_device *pdev); extern int of_device_register(struct platform_device *ofdev); extern void of_device_unregister(struct platform_device *ofdev); +extern const void *of_device_get_match_data(const struct device *dev); + extern ssize_t of_device_get_modalias(struct device *dev, char *str, ssize_t len); @@ -65,6 +67,11 @@ static inline int of_driver_match_device(struct device *dev, static inline void of_device_uevent(struct device *dev, struct kobj_uevent_env *env) { } +static inline const void *of_device_get_match_data(const struct device *dev) +{ + return NULL; +} + static inline int of_device_get_modalias(struct device *dev, char *str, ssize_t len) { -- cgit v1.2.3 From 3b1a2c97210b1edd1a999ff8c1f72ab28f7609f7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 13 May 2015 16:33:56 +0200 Subject: of/fdt: Make fdt blob input parameters of unflatten functions const Operations to unflatten fdt blobs never modify the input blobs, hence make them const. Now we no longer need to cast arbitrary const data to "void *" when calling of_fdt_unflatten_tree(). Signed-off-by: Geert Uytterhoeven Signed-off-by: Rob Herring --- include/linux/of_fdt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 587ee507965d..0cf217d404ce 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -37,7 +37,7 @@ extern bool of_fdt_is_big_endian(const void *blob, unsigned long node); extern int of_fdt_match(const void *blob, unsigned long node, const char *const *compat); -extern void of_fdt_unflatten_tree(unsigned long *blob, +extern void of_fdt_unflatten_tree(const unsigned long *blob, struct device_node **mynodes); /* TBD: Temporary export of fdt globals - remove when code fully merged */ -- cgit v1.2.3 From 3c6296f716ebef704b76070d90567ab4faa8462c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 28 May 2015 13:21:34 -0400 Subject: ring-buffer: Remove useless unused tracing_off_permanent() The tracing_off_permanent() call is a way to disable all ring_buffers. Nothing uses it and nothing should use it, as tracing_off() and friends are better, as they disable the ring buffers related to tracing. The tracing_off_permanent() even disabled non tracing ring buffers. This is a bit drastic, and was added to handle NMIs doing outputs that could corrupt the ring buffer when only tracing used them. It is now obsolete and adds a little overhead, it should be removed. Signed-off-by: Steven Rostedt --- include/linux/kernel.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 3a5b48e52a9e..d948718a83d7 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -532,12 +532,6 @@ bool mac_pton(const char *s, u8 *mac); * * Most likely, you want to use tracing_on/tracing_off. */ -#ifdef CONFIG_RING_BUFFER -/* trace_off_permanent stops recording with no way to bring it back */ -void tracing_off_permanent(void); -#else -static inline void tracing_off_permanent(void) { } -#endif enum ftrace_dump_mode { DUMP_NONE, -- cgit v1.2.3 From 80188b0d77d7426b494af739ac129e0e684acb84 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 29 May 2015 07:39:34 +1000 Subject: percpu_counter: batch size aware __percpu_counter_compare() XFS uses non-stanard batch sizes for avoiding frequent global counter updates on it's allocated inode counters, as they increment or decrement in batches of 64 inodes. Hence the standard percpu counter batch of 32 means that the counter is effectively a global counter. Currently Xfs uses a batch size of 128 so that it doesn't take the global lock on every single modification. However, Xfs also needs to compare accurately against zero, which means we need to use percpu_counter_compare(), and that has a hard-coded batch size of 32, and hence will spuriously fail to detect when it is supposed to use precise comparisons and hence the accounting goes wrong. Add __percpu_counter_compare() to take a custom batch size so we can use it sanely in XFS and factor percpu_counter_compare() to use it. Signed-off-by: Dave Chinner Acked-by: Tejun Heo Signed-off-by: Dave Chinner --- include/linux/percpu_counter.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 50e50095c8d1..84a109449610 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -41,7 +41,12 @@ void percpu_counter_destroy(struct percpu_counter *fbc); void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); s64 __percpu_counter_sum(struct percpu_counter *fbc); -int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs); +int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch); + +static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) +{ + return __percpu_counter_compare(fbc, rhs, percpu_counter_batch); +} static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { @@ -116,6 +121,12 @@ static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) return 0; } +static inline int +__percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch) +{ + return percpu_counter_compare(fbc, rhs); +} + static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { -- cgit v1.2.3 From 0040b933187b11f78e83dd162a31d64a46be4e37 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 Apr 2015 11:52:23 -0700 Subject: f2fs: add missing version info in superblock The mkfs.f2fs remains kernel version in superblock, but f2fs module has not added that so far. Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 591f8c3ef410..8d345c24bcf7 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -50,6 +50,8 @@ #define MAX_ACTIVE_NODE_LOGS 8 #define MAX_ACTIVE_DATA_LOGS 8 +#define VERSION_LEN 256 + /* * For superblock */ @@ -86,6 +88,9 @@ struct f2fs_super_block { __le32 extension_count; /* # of extensions below */ __u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */ __le32 cp_payload; + __u8 version[VERSION_LEN]; /* the kernel version */ + __u8 init_version[VERSION_LEN]; /* the initial kernel version */ + __u8 reserved[892]; /* valid reserved region */ } __packed; /* -- cgit v1.2.3 From 76f105a2dbcd47509bac6ba8d94cb3759a3e6e9d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 13 Apr 2015 15:10:36 -0700 Subject: f2fs: add feature facility in superblock This patch introduces a feature in superblock, which will indicate any new features for f2fs. Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 8d345c24bcf7..d44e97f2b98e 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -90,7 +90,8 @@ struct f2fs_super_block { __le32 cp_payload; __u8 version[VERSION_LEN]; /* the kernel version */ __u8 init_version[VERSION_LEN]; /* the initial kernel version */ - __u8 reserved[892]; /* valid reserved region */ + __le32 feature; /* defined features */ + __u8 reserved[888]; /* valid reserved region */ } __packed; /* -- cgit v1.2.3 From cde4de1205770514005663d70a9a7d81cb555085 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 Apr 2015 13:57:51 -0700 Subject: f2fs crypto: declare some definitions for f2fs encryption feature This definitions will be used by inode and superblock for encyption. Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index d44e97f2b98e..920408a21ffd 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -91,7 +91,9 @@ struct f2fs_super_block { __u8 version[VERSION_LEN]; /* the kernel version */ __u8 init_version[VERSION_LEN]; /* the initial kernel version */ __le32 feature; /* defined features */ - __u8 reserved[888]; /* valid reserved region */ + __u8 encryption_level; /* versioning level for encryption */ + __u8 encrypt_pw_salt[16]; /* Salt used for string2key algorithm */ + __u8 reserved[871]; /* valid reserved region */ } __packed; /* -- cgit v1.2.3 From f8df88081183bd4d3c461c617c3519445eb85642 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Wed, 27 May 2015 23:06:30 +0900 Subject: extcon: Remove optional print_name() function pointer of extcon_dev This patch removes the optional print_name() function pointer included in 'struct extcon_dev' because the extcon must maintain the consistent name of extcon device on sysfs instead of inconsistent name. After merged patch[1], extcon can maintain the consistent name of extcon device without any hard-coded device name. [1] https://lkml.org/lkml/2015/4/27/258 Signed-off-by: Chanwoo Choi --- include/linux/extcon.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index be9652b3a154..a7b224b20ecc 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -83,8 +83,6 @@ struct extcon_cable; * be attached simulataneously. {0x7, 0} is equivalent to * {0x3, 0x6, 0x5, 0}. If it is {0xFFFFFFFF, 0}, there * can be no simultaneous connections. - * @print_name: An optional callback to override the method to print the - * name of the extcon device. * @print_state: An optional callback to override the method to print the * status of the extcon device. * @dev: Device of this extcon. @@ -111,7 +109,6 @@ struct extcon_dev { const u32 *mutually_exclusive; /* Optional callbacks to override class functions */ - ssize_t (*print_name)(struct extcon_dev *edev, char *buf); ssize_t (*print_state)(struct extcon_dev *edev, char *buf); /* Internal data. Please do not set. */ -- cgit v1.2.3 From c80ef9e0c021ff86771fdd72583c75d8f7b6a720 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 29 May 2015 10:52:59 +0200 Subject: cgroup: add seq_file forward declaration for struct cftype Recent header file changes for cgroup caused lots of warnings about a missing struct seq_file form declaration for every inclusion of include/linux/cgroup-defs.h. As some files are built with -Werror, this leads to build failure like: from /git/arm-soc/drivers/gpu/drm/tilcdc/tilcdc_crtc.c:18: /git/arm-soc/include/linux/cgroup-defs.h:354:25: error: 'struct seq_file' declared inside parameter list [-Werror] cc1: all warnings being treated as errors make[6]: *** [drivers/gpu/drm/tilcdc/tilcdc_crtc.o] Error 1 This patch adds the declaration, which resolves both the warnings and the drm failure. tj: Moved it where other type declarations are. Signed-off-by: Arnd Bergmann Fixes: b4a04ab7a37b ("cgroup: separate out include/linux/cgroup-defs.h") Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 7d83d7f73420..26d1cea7929f 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -26,6 +26,7 @@ struct cgroup_taskset; struct kernfs_node; struct kernfs_ops; struct kernfs_open_file; +struct seq_file; #define MAX_CGROUP_TYPE_NAMELEN 32 #define MAX_CGROUP_ROOT_NAMELEN 64 -- cgit v1.2.3 From e548ca4ee4595f65b262661d166310ad8a149bec Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 29 May 2015 13:11:32 -0600 Subject: block: don't honor chunk sizes for data-less IO We don't need to honor chunk sizes for IO that doesn't carry any data. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9ded80da2c16..ccaa9aecd593 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -903,7 +903,7 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq) if (unlikely(rq->cmd_type == REQ_TYPE_BLOCK_PC)) return q->limits.max_hw_sectors; - if (!q->limits.chunk_sectors) + if (!q->limits.chunk_sectors || (rq->cmd_flags & REQ_DISCARD)) return blk_queue_get_max_sectors(q, rq->cmd_flags); return min(blk_max_size_offset(q, blk_rq_pos(rq)), -- cgit v1.2.3 From 19bdb6e4ec071bc49a9871b41e6a59a1657ed365 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 26 May 2015 15:11:44 -0600 Subject: PCI: Move pci_ari_enabled() to global header pci_ari_enabled() is useful outside of drivers/pci, particularly for deriving INTx routing via ACPI _PRT, so move it to the global header. Also convert to bool return. Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas Reviewed-by: Don Dutile Acked-by: Rafael J. Wysocki --- include/linux/pci.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 353db8dc4c6e..2925561a8f1e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1905,4 +1905,15 @@ static inline bool pci_is_dev_assigned(struct pci_dev *pdev) { return (pdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED) == PCI_DEV_FLAGS_ASSIGNED; } + +/** + * pci_ari_enabled - query ARI forwarding status + * @bus: the PCI bus + * + * Returns true if ARI forwarding is enabled. + */ +static inline bool pci_ari_enabled(struct pci_bus *bus) +{ + return bus->self && bus->self->ari_enabled; +} #endif /* LINUX_PCI_H */ -- cgit v1.2.3 From 3a9ad0b4fdcd57f775d3615004c8c64c021a9e7d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 27 May 2015 17:23:51 -0700 Subject: PCI: Add pci_bus_addr_t David Ahern reported that d63e2e1f3df9 ("sparc/PCI: Clip bridge windows to fit in upstream windows") fails to boot on sparc/T5-8: pci 0000:06:00.0: reg 0x184: can't handle BAR above 4GB (bus address 0x110204000) The problem is that sparc64 assumed that dma_addr_t only needed to hold DMA addresses, i.e., bus addresses returned via the DMA API (dma_map_single(), etc.), while the PCI core assumed dma_addr_t could hold *any* bus address, including raw BAR values. On sparc64, all DMA addresses fit in 32 bits, so dma_addr_t is a 32-bit type. However, BAR values can be 64 bits wide, so they don't fit in a dma_addr_t. d63e2e1f3df9 added new checking that tripped over this mismatch. Add pci_bus_addr_t, which is wide enough to hold any PCI bus address, including both raw BAR values and DMA addresses. This will be 64 bits on 64-bit platforms and on platforms with a 64-bit dma_addr_t. Then dma_addr_t only needs to be wide enough to hold addresses from the DMA API. [bhelgaas: changelog, bugzilla, Kconfig to ensure pci_bus_addr_t is at least as wide as dma_addr_t, documentation] Fixes: d63e2e1f3df9 ("sparc/PCI: Clip bridge windows to fit in upstream windows") Fixes: 23b13bc76f35 ("PCI: Fail safely if we can't handle BARs larger than 4GB") Link: http://lkml.kernel.org/r/CAE9FiQU1gJY1LYrxs+ma5LCTEEe4xmtjRG0aXJ9K_Tsu+m9Wuw@mail.gmail.com Link: http://lkml.kernel.org/r/1427857069-6789-1-git-send-email-yinghai@kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=96231 Reported-by: David Ahern Tested-by: David Ahern Signed-off-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Acked-by: David S. Miller CC: stable@vger.kernel.org # v3.19+ --- include/linux/pci.h | 12 +++++++++--- include/linux/types.h | 12 ++++++++++-- 2 files changed, 19 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 353db8dc4c6e..956f74bad37a 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -577,9 +577,15 @@ int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn, int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn, int reg, int len, u32 val); +#ifdef CONFIG_PCI_BUS_ADDR_T_64BIT +typedef u64 pci_bus_addr_t; +#else +typedef u32 pci_bus_addr_t; +#endif + struct pci_bus_region { - dma_addr_t start; - dma_addr_t end; + pci_bus_addr_t start; + pci_bus_addr_t end; }; struct pci_dynids { @@ -1128,7 +1134,7 @@ int __must_check pci_bus_alloc_resource(struct pci_bus *bus, int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr); -static inline dma_addr_t pci_bus_address(struct pci_dev *pdev, int bar) +static inline pci_bus_addr_t pci_bus_address(struct pci_dev *pdev, int bar) { struct pci_bus_region region; diff --git a/include/linux/types.h b/include/linux/types.h index 59698be03490..8715287c3b1f 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -139,12 +139,20 @@ typedef unsigned long blkcnt_t; */ #define pgoff_t unsigned long -/* A dma_addr_t can hold any valid DMA or bus address for the platform */ +/* + * A dma_addr_t can hold any valid DMA address, i.e., any address returned + * by the DMA API. + * + * If the DMA API only uses 32-bit addresses, dma_addr_t need only be 32 + * bits wide. Bus addresses, e.g., PCI BARs, may be wider than 32 bits, + * but drivers do memory-mapped I/O to ioremapped kernel virtual addresses, + * so they don't care about the size of the actual bus addresses. + */ #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT typedef u64 dma_addr_t; #else typedef u32 dma_addr_t; -#endif /* dma_addr_t */ +#endif typedef unsigned __bitwise__ gfp_t; typedef unsigned __bitwise__ fmode_t; -- cgit v1.2.3 From db874c7e10557f8f1af9a6fb1ec6589ae06f349c Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 29 May 2015 09:54:02 -0700 Subject: PM / wakeirq: Fix typo in prototype for dev_pm_set_dedicated_wake_irq Looks like I only built test the dev_pm_set_wake_irq and not the dev_pm_set_dedicated_wake_irq case on x86. Turns out there's a typo for the dev_pm_set_dedicated_wake_irq prototype that causes a build error if CONFIG_COMPILE_TEST and CONFIG_MMC_OMAP_HS are selected. Reported-by: Jim Davis Signed-off-by: Tony Lindgren Reviewed-by: Felipe Balbi Signed-off-by: Rafael J. Wysocki --- include/linux/pm_wakeirq.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_wakeirq.h b/include/linux/pm_wakeirq.h index 4046fa1b7d25..cd5b62db9084 100644 --- a/include/linux/pm_wakeirq.h +++ b/include/linux/pm_wakeirq.h @@ -30,8 +30,7 @@ static inline int dev_pm_set_wake_irq(struct device *dev, int irq) return 0; } -static inline int dev_pm_set_dedicated__wake_irq(struct device *dev, - int irq) +static inline int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq) { return 0; } -- cgit v1.2.3 From 5790cf3c00c2f92aacba348e13f8a9a8f5dd96bd Mon Sep 17 00:00:00 2001 From: Mathieu Olivari Date: Wed, 27 May 2015 11:02:47 -0700 Subject: stmmac: add phy-handle support to the platform layer On stmmac driver, PHY specification in device-tree was done using the non-standard property "snps,phy-addr". Specifying a PHY on a different MDIO bus that the one within the stmmac controller doesn't seem to be possible when device-tree is used. This change adds support for the phy-handle property, as specified in Documentation/devicetree/bindings/net/ethernet.txt. Signed-off-by: Mathieu Olivari Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 7f484a239f53..c735f5c91eea 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -99,6 +99,7 @@ struct plat_stmmacenet_data { int phy_addr; int interface; struct stmmac_mdio_bus_data *mdio_bus_data; + struct device_node *phy_node; struct stmmac_dma_cfg *dma_cfg; int clk_csr; int has_gmac; -- cgit v1.2.3 From f4fb874cf076f9eafdd15c0a88cd0f0397b95e43 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Wed, 27 May 2015 21:07:26 -0400 Subject: if_vlan: fix vlaue -> value typo Fixes "vlaue" for "value" in include/linux/if_vlan.h. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index b9ab677c0c0a..a40d29846ac2 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -416,7 +416,7 @@ static inline void __vlan_hwaccel_put_tag(struct sk_buff *skb, /** * __vlan_get_tag - get the VLAN ID that is part of the payload * @skb: skbuff to query - * @vlan_tci: buffer to store vlaue + * @vlan_tci: buffer to store value * * Returns error if the skb is not of VLAN type */ @@ -435,7 +435,7 @@ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) /** * __vlan_hwaccel_get_tag - get the VLAN ID that is in @skb->cb[] * @skb: skbuff to query - * @vlan_tci: buffer to store vlaue + * @vlan_tci: buffer to store value * * Returns error if @skb->vlan_tci is not set correctly */ @@ -456,7 +456,7 @@ static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb, /** * vlan_get_tag - get the VLAN ID from the skb * @skb: skbuff to query - * @vlan_tci: buffer to store vlaue + * @vlan_tci: buffer to store value * * Returns error if the skb is not VLAN tagged */ -- cgit v1.2.3 From 64ffaa2159b752e6c263dc57eaaaed7367d37493 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 28 May 2015 22:28:38 +0300 Subject: net/mlx5_core,mlx5_ib: Do not use vmap() on coherent memory As David Daney pointed in mlx4_core driver [1], mlx5_core is also misusing the DMA-API. This patch is removing the code that vmap() memory allocated by dma_alloc_coherent(). After this patch, users of this drivers might fail allocating resources on memory fragmeneted systems. This will be fixed later on. [1] - https://patchwork.ozlabs.org/patch/458531/ CC: David Daney Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 9a90e7523dc2..c4cf25ffcc16 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -334,8 +334,6 @@ struct mlx5_buf_list { struct mlx5_buf { struct mlx5_buf_list direct; - struct mlx5_buf_list *page_list; - int nbufs; int npages; int size; u8 page_shift; @@ -586,11 +584,7 @@ struct mlx5_pas { static inline void *mlx5_buf_offset(struct mlx5_buf *buf, int offset) { - if (likely(BITS_PER_LONG == 64 || buf->nbufs == 1)) return buf->direct.buf + offset; - else - return buf->page_list[offset >> PAGE_SHIFT].buf + - (offset & (PAGE_SIZE - 1)); } extern struct workqueue_struct *mlx5_core_wq; @@ -669,8 +663,7 @@ void mlx5_health_cleanup(void); void __init mlx5_health_init(void); void mlx5_start_health_poll(struct mlx5_core_dev *dev); void mlx5_stop_health_poll(struct mlx5_core_dev *dev); -int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, int max_direct, - struct mlx5_buf *buf); +int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf); void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf); struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, gfp_t flags, int npages); -- cgit v1.2.3 From db058a186f98b057c19c42f7b10d9a96fd3b5d59 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 28 May 2015 22:28:39 +0300 Subject: net/mlx5_core: Set irq affinity hints Preparation for upcoming ethernet driver. - Move msix array from eq_table struct to priv since its not related to eq_table - Intorduce irq_info struct to hold all irq information - Move name from mlx5_eq to irq_info struct since it is irq property. - Set IRQ affinity hints Signed-off-by: Achiad Shochat Signed-off-by: Rana Shahout Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index c4cf25ffcc16..9e8979502826 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -85,7 +85,7 @@ enum { }; enum { - MLX5_MAX_EQ_NAME = 32 + MLX5_MAX_IRQ_NAME = 32 }; enum { @@ -349,7 +349,6 @@ struct mlx5_eq { u8 eqn; int nent; u64 mask; - char name[MLX5_MAX_EQ_NAME]; struct list_head list; int index; struct mlx5_rsc_debug *dbg; @@ -412,7 +411,6 @@ struct mlx5_eq_table { struct mlx5_eq pages_eq; struct mlx5_eq async_eq; struct mlx5_eq cmd_eq; - struct msix_entry *msix_arr; int num_comp_vectors; /* protect EQs list */ @@ -465,9 +463,16 @@ struct mlx5_mr_table { struct radix_tree_root tree; }; +struct mlx5_irq_info { + cpumask_var_t mask; + char name[MLX5_MAX_IRQ_NAME]; +}; + struct mlx5_priv { char name[MLX5_MAX_NAME_LEN]; struct mlx5_eq_table eq_table; + struct msix_entry *msix_arr; + struct mlx5_irq_info *irq_info; struct mlx5_uuar_info uuari; MLX5_DECLARE_DOORBELL_LOCK(cq_uar_lock); -- cgit v1.2.3 From e281682bf29438848daac11627216bceb1507b71 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 28 May 2015 22:28:40 +0300 Subject: net/mlx5_core: HW data structs/types definitions cleanup mlx5_ifc.h was heavily modified here since it is now generated by a script from the device specification (PRM rev 0.25). This specification is backward compatible to existing hardware. Some structures/fields were added here in order to enable the Ethernet functionality of the driver. Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 113 +- include/linux/mlx5/driver.h | 4 +- include/linux/mlx5/mlx5_ifc.h | 6608 ++++++++++++++++++++++++++++++++++++++++- include/linux/mlx5/qp.h | 25 + 4 files changed, 6650 insertions(+), 100 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index abf65c790421..feebed7b392b 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -35,6 +35,7 @@ #include #include +#include #if defined(__LITTLE_ENDIAN) #define MLX5_SET_HOST_ENDIANNESS 0 @@ -70,6 +71,14 @@ << __mlx5_dw_bit_off(typ, fld))); \ } while (0) +#define MLX5_SET_TO_ONES(typ, p, fld) do { \ + BUILD_BUG_ON(__mlx5_st_sz_bits(typ) % 32); \ + *((__be32 *)(p) + __mlx5_dw_off(typ, fld)) = \ + cpu_to_be32((be32_to_cpu(*((__be32 *)(p) + __mlx5_dw_off(typ, fld))) & \ + (~__mlx5_dw_mask(typ, fld))) | ((__mlx5_mask(typ, fld)) \ + << __mlx5_dw_bit_off(typ, fld))); \ +} while (0) + #define MLX5_GET(typ, p, fld) ((be32_to_cpu(*((__be32 *)(p) +\ __mlx5_dw_off(typ, fld))) >> __mlx5_dw_bit_off(typ, fld)) & \ __mlx5_mask(typ, fld)) @@ -264,6 +273,7 @@ enum { MLX5_OPCODE_RDMA_WRITE_IMM = 0x09, MLX5_OPCODE_SEND = 0x0a, MLX5_OPCODE_SEND_IMM = 0x0b, + MLX5_OPCODE_LSO = 0x0e, MLX5_OPCODE_RDMA_READ = 0x10, MLX5_OPCODE_ATOMIC_CS = 0x11, MLX5_OPCODE_ATOMIC_FA = 0x12, @@ -541,6 +551,10 @@ struct mlx5_cmd_prot_block { u8 sig; }; +enum { + MLX5_CQE_SYND_FLUSHED_IN_ERROR = 5, +}; + struct mlx5_err_cqe { u8 rsvd0[32]; __be32 srqn; @@ -554,13 +568,22 @@ struct mlx5_err_cqe { }; struct mlx5_cqe64 { - u8 rsvd0[17]; + u8 rsvd0[4]; + u8 lro_tcppsh_abort_dupack; + u8 lro_min_ttl; + __be16 lro_tcp_win; + __be32 lro_ack_seq_num; + __be32 rss_hash_result; + u8 rss_hash_type; u8 ml_path; - u8 rsvd20[4]; + u8 rsvd20[2]; + __be16 check_sum; __be16 slid; __be32 flags_rqpn; - u8 rsvd28[4]; - __be32 srqn; + u8 hds_ip_ext; + u8 l4_hdr_type_etc; + __be16 vlan_info; + __be32 srqn; /* [31:24]: lro_num_seg, [23:0]: srqn */ __be32 imm_inval_pkey; u8 rsvd40[4]; __be32 byte_cnt; @@ -571,6 +594,40 @@ struct mlx5_cqe64 { u8 op_own; }; +static inline int get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe) +{ + return (cqe->lro_tcppsh_abort_dupack >> 6) & 1; +} + +static inline u8 get_cqe_l4_hdr_type(struct mlx5_cqe64 *cqe) +{ + return (cqe->l4_hdr_type_etc >> 4) & 0x7; +} + +static inline int cqe_has_vlan(struct mlx5_cqe64 *cqe) +{ + return !!(cqe->l4_hdr_type_etc & 0x1); +} + +enum { + CQE_L4_HDR_TYPE_NONE = 0x0, + CQE_L4_HDR_TYPE_TCP_NO_ACK = 0x1, + CQE_L4_HDR_TYPE_UDP = 0x2, + CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA = 0x3, + CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA = 0x4, +}; + +enum { + CQE_RSS_HTYPE_IP = 0x3 << 6, + CQE_RSS_HTYPE_L4 = 0x3 << 2, +}; + +enum { + CQE_L2_OK = 1 << 0, + CQE_L3_OK = 1 << 1, + CQE_L4_OK = 1 << 2, +}; + struct mlx5_sig_err_cqe { u8 rsvd0[16]; __be32 expected_trans_sig; @@ -996,4 +1053,52 @@ struct mlx5_destroy_psv_out { u8 rsvd[8]; }; +#define MLX5_CMD_OP_MAX 0x920 + +enum { + VPORT_STATE_DOWN = 0x0, + VPORT_STATE_UP = 0x1, +}; + +enum { + MLX5_L3_PROT_TYPE_IPV4 = 0, + MLX5_L3_PROT_TYPE_IPV6 = 1, +}; + +enum { + MLX5_L4_PROT_TYPE_TCP = 0, + MLX5_L4_PROT_TYPE_UDP = 1, +}; + +enum { + MLX5_HASH_FIELD_SEL_SRC_IP = 1 << 0, + MLX5_HASH_FIELD_SEL_DST_IP = 1 << 1, + MLX5_HASH_FIELD_SEL_L4_SPORT = 1 << 2, + MLX5_HASH_FIELD_SEL_L4_DPORT = 1 << 3, + MLX5_HASH_FIELD_SEL_IPSEC_SPI = 1 << 4, +}; + +enum { + MLX5_MATCH_OUTER_HEADERS = 1 << 0, + MLX5_MATCH_MISC_PARAMETERS = 1 << 1, + MLX5_MATCH_INNER_HEADERS = 1 << 2, + +}; + +enum { + MLX5_FLOW_TABLE_TYPE_NIC_RCV = 0, + MLX5_FLOW_TABLE_TYPE_ESWITCH = 4, +}; + +enum { + MLX5_FLOW_CONTEXT_DEST_TYPE_VPORT = 0, + MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE = 1, + MLX5_FLOW_CONTEXT_DEST_TYPE_TIR = 2, +}; + +enum { + MLX5_RQC_RQ_TYPE_MEMORY_RQ_INLINE = 0x0, + MLX5_RQC_RQ_TYPE_MEMORY_RQ_RPM = 0x1, +}; + #endif /* MLX5_DEVICE_H */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 9e8979502826..3fd4fdc1ba16 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -44,7 +44,6 @@ #include #include -#include enum { MLX5_BOARD_ID_LEN = 64, @@ -278,7 +277,6 @@ struct mlx5_general_caps { u8 log_max_mkey; u8 log_max_pd; u8 log_max_srq; - u8 log_max_strq; u8 log_max_mrw_sz; u8 log_max_bsf_list_size; u8 log_max_klm_list_size; @@ -664,6 +662,8 @@ int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn); int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn); int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari); int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari); +int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar); +void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar); void mlx5_health_cleanup(void); void __init mlx5_health_init(void); void mlx5_start_health_poll(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index cb3ad17edd1f..b27e9f6e090a 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -28,11 +28,44 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - */ - +*/ #ifndef MLX5_IFC_H #define MLX5_IFC_H +enum { + MLX5_EVENT_TYPE_CODING_COMPLETION_EVENTS = 0x0, + MLX5_EVENT_TYPE_CODING_PATH_MIGRATED_SUCCEEDED = 0x1, + MLX5_EVENT_TYPE_CODING_COMMUNICATION_ESTABLISHED = 0x2, + MLX5_EVENT_TYPE_CODING_SEND_QUEUE_DRAINED = 0x3, + MLX5_EVENT_TYPE_CODING_LAST_WQE_REACHED = 0x13, + MLX5_EVENT_TYPE_CODING_SRQ_LIMIT = 0x14, + MLX5_EVENT_TYPE_CODING_DCT_ALL_CONNECTIONS_CLOSED = 0x1c, + MLX5_EVENT_TYPE_CODING_DCT_ACCESS_KEY_VIOLATION = 0x1d, + MLX5_EVENT_TYPE_CODING_CQ_ERROR = 0x4, + MLX5_EVENT_TYPE_CODING_LOCAL_WQ_CATASTROPHIC_ERROR = 0x5, + MLX5_EVENT_TYPE_CODING_PATH_MIGRATION_FAILED = 0x7, + MLX5_EVENT_TYPE_CODING_PAGE_FAULT_EVENT = 0xc, + MLX5_EVENT_TYPE_CODING_INVALID_REQUEST_LOCAL_WQ_ERROR = 0x10, + MLX5_EVENT_TYPE_CODING_LOCAL_ACCESS_VIOLATION_WQ_ERROR = 0x11, + MLX5_EVENT_TYPE_CODING_LOCAL_SRQ_CATASTROPHIC_ERROR = 0x12, + MLX5_EVENT_TYPE_CODING_INTERNAL_ERROR = 0x8, + MLX5_EVENT_TYPE_CODING_PORT_STATE_CHANGE = 0x9, + MLX5_EVENT_TYPE_CODING_GPIO_EVENT = 0x15, + MLX5_EVENT_TYPE_CODING_REMOTE_CONFIGURATION_PROTOCOL_EVENT = 0x19, + MLX5_EVENT_TYPE_CODING_DOORBELL_BLUEFLAME_CONGESTION_EVENT = 0x1a, + MLX5_EVENT_TYPE_CODING_STALL_VL_EVENT = 0x1b, + MLX5_EVENT_TYPE_CODING_DROPPED_PACKET_LOGGED_EVENT = 0x1f, + MLX5_EVENT_TYPE_CODING_COMMAND_INTERFACE_COMPLETION = 0xa, + MLX5_EVENT_TYPE_CODING_PAGE_REQUEST = 0xb +}; + +enum { + MLX5_MODIFY_TIR_BITMASK_LRO = 0x0, + MLX5_MODIFY_TIR_BITMASK_INDIRECT_TABLE = 0x1, + MLX5_MODIFY_TIR_BITMASK_HASH = 0x2, + MLX5_MODIFY_TIR_BITMASK_TUNNELED_OFFLOAD_EN = 0x3 +}; + enum { MLX5_CMD_OP_QUERY_HCA_CAP = 0x100, MLX5_CMD_OP_QUERY_ADAPTER = 0x101, @@ -43,6 +76,8 @@ enum { MLX5_CMD_OP_QUERY_PAGES = 0x107, MLX5_CMD_OP_MANAGE_PAGES = 0x108, MLX5_CMD_OP_SET_HCA_CAP = 0x109, + MLX5_CMD_OP_QUERY_ISSI = 0x10a, + MLX5_CMD_OP_SET_ISSI = 0x10b, MLX5_CMD_OP_CREATE_MKEY = 0x200, MLX5_CMD_OP_QUERY_MKEY = 0x201, MLX5_CMD_OP_DESTROY_MKEY = 0x202, @@ -66,6 +101,7 @@ enum { MLX5_CMD_OP_2ERR_QP = 0x507, MLX5_CMD_OP_2RST_QP = 0x50a, MLX5_CMD_OP_QUERY_QP = 0x50b, + MLX5_CMD_OP_SQD_RTS_QP = 0x50c, MLX5_CMD_OP_INIT2INIT_QP = 0x50e, MLX5_CMD_OP_CREATE_PSV = 0x600, MLX5_CMD_OP_DESTROY_PSV = 0x601, @@ -73,7 +109,10 @@ enum { MLX5_CMD_OP_DESTROY_SRQ = 0x701, MLX5_CMD_OP_QUERY_SRQ = 0x702, MLX5_CMD_OP_ARM_RQ = 0x703, - MLX5_CMD_OP_RESIZE_SRQ = 0x704, + MLX5_CMD_OP_CREATE_XRC_SRQ = 0x705, + MLX5_CMD_OP_DESTROY_XRC_SRQ = 0x706, + MLX5_CMD_OP_QUERY_XRC_SRQ = 0x707, + MLX5_CMD_OP_ARM_XRC_SRQ = 0x708, MLX5_CMD_OP_CREATE_DCT = 0x710, MLX5_CMD_OP_DESTROY_DCT = 0x711, MLX5_CMD_OP_DRAIN_DCT = 0x712, @@ -85,8 +124,12 @@ enum { MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT = 0x753, MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT = 0x754, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT = 0x755, - MLX5_CMD_OP_QUERY_RCOE_ADDRESS = 0x760, + MLX5_CMD_OP_QUERY_ROCE_ADDRESS = 0x760, MLX5_CMD_OP_SET_ROCE_ADDRESS = 0x761, + MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT = 0x762, + MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT = 0x763, + MLX5_CMD_OP_QUERY_HCA_VPORT_GID = 0x764, + MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY = 0x765, MLX5_CMD_OP_QUERY_VPORT_COUNTER = 0x770, MLX5_CMD_OP_ALLOC_Q_COUNTER = 0x771, MLX5_CMD_OP_DEALLOC_Q_COUNTER = 0x772, @@ -98,7 +141,7 @@ enum { MLX5_CMD_OP_CONFIG_INT_MODERATION = 0x804, MLX5_CMD_OP_ACCESS_REG = 0x805, MLX5_CMD_OP_ATTACH_TO_MCG = 0x806, - MLX5_CMD_OP_DETACH_FROM_MCG = 0x807, + MLX5_CMD_OP_DETTACH_FROM_MCG = 0x807, MLX5_CMD_OP_GET_DROPPED_PACKET_LOG = 0x80a, MLX5_CMD_OP_MAD_IFC = 0x50d, MLX5_CMD_OP_QUERY_MAD_DEMUX = 0x80b, @@ -106,23 +149,22 @@ enum { MLX5_CMD_OP_NOP = 0x80d, MLX5_CMD_OP_ALLOC_XRCD = 0x80e, MLX5_CMD_OP_DEALLOC_XRCD = 0x80f, - MLX5_CMD_OP_SET_BURST_SIZE = 0x812, - MLX5_CMD_OP_QUERY_BURST_SZIE = 0x813, - MLX5_CMD_OP_ACTIVATE_TRACER = 0x814, - MLX5_CMD_OP_DEACTIVATE_TRACER = 0x815, - MLX5_CMD_OP_CREATE_SNIFFER_RULE = 0x820, - MLX5_CMD_OP_DESTROY_SNIFFER_RULE = 0x821, - MLX5_CMD_OP_QUERY_CONG_PARAMS = 0x822, - MLX5_CMD_OP_MODIFY_CONG_PARAMS = 0x823, - MLX5_CMD_OP_QUERY_CONG_STATISTICS = 0x824, + MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN = 0x816, + MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN = 0x817, + MLX5_CMD_OP_QUERY_CONG_STATUS = 0x822, + MLX5_CMD_OP_MODIFY_CONG_STATUS = 0x823, + MLX5_CMD_OP_QUERY_CONG_PARAMS = 0x824, + MLX5_CMD_OP_MODIFY_CONG_PARAMS = 0x825, + MLX5_CMD_OP_QUERY_CONG_STATISTICS = 0x826, + MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT = 0x827, + MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT = 0x828, + MLX5_CMD_OP_SET_L2_TABLE_ENTRY = 0x829, + MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY = 0x82a, + MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY = 0x82b, MLX5_CMD_OP_CREATE_TIR = 0x900, MLX5_CMD_OP_MODIFY_TIR = 0x901, MLX5_CMD_OP_DESTROY_TIR = 0x902, MLX5_CMD_OP_QUERY_TIR = 0x903, - MLX5_CMD_OP_CREATE_TIS = 0x912, - MLX5_CMD_OP_MODIFY_TIS = 0x913, - MLX5_CMD_OP_DESTROY_TIS = 0x914, - MLX5_CMD_OP_QUERY_TIS = 0x915, MLX5_CMD_OP_CREATE_SQ = 0x904, MLX5_CMD_OP_MODIFY_SQ = 0x905, MLX5_CMD_OP_DESTROY_SQ = 0x906, @@ -135,9 +177,430 @@ enum { MLX5_CMD_OP_MODIFY_RMP = 0x90d, MLX5_CMD_OP_DESTROY_RMP = 0x90e, MLX5_CMD_OP_QUERY_RMP = 0x90f, - MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY = 0x910, - MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY = 0x911, - MLX5_CMD_OP_MAX = 0x911 + MLX5_CMD_OP_CREATE_TIS = 0x912, + MLX5_CMD_OP_MODIFY_TIS = 0x913, + MLX5_CMD_OP_DESTROY_TIS = 0x914, + MLX5_CMD_OP_QUERY_TIS = 0x915, + MLX5_CMD_OP_CREATE_RQT = 0x916, + MLX5_CMD_OP_MODIFY_RQT = 0x917, + MLX5_CMD_OP_DESTROY_RQT = 0x918, + MLX5_CMD_OP_QUERY_RQT = 0x919, + MLX5_CMD_OP_CREATE_FLOW_TABLE = 0x930, + MLX5_CMD_OP_DESTROY_FLOW_TABLE = 0x931, + MLX5_CMD_OP_QUERY_FLOW_TABLE = 0x932, + MLX5_CMD_OP_CREATE_FLOW_GROUP = 0x933, + MLX5_CMD_OP_DESTROY_FLOW_GROUP = 0x934, + MLX5_CMD_OP_QUERY_FLOW_GROUP = 0x935, + MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY = 0x936, + MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY = 0x937, + MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY = 0x938 +}; + +struct mlx5_ifc_flow_table_fields_supported_bits { + u8 outer_dmac[0x1]; + u8 outer_smac[0x1]; + u8 outer_ether_type[0x1]; + u8 reserved_0[0x1]; + u8 outer_first_prio[0x1]; + u8 outer_first_cfi[0x1]; + u8 outer_first_vid[0x1]; + u8 reserved_1[0x1]; + u8 outer_second_prio[0x1]; + u8 outer_second_cfi[0x1]; + u8 outer_second_vid[0x1]; + u8 reserved_2[0x1]; + u8 outer_sip[0x1]; + u8 outer_dip[0x1]; + u8 outer_frag[0x1]; + u8 outer_ip_protocol[0x1]; + u8 outer_ip_ecn[0x1]; + u8 outer_ip_dscp[0x1]; + u8 outer_udp_sport[0x1]; + u8 outer_udp_dport[0x1]; + u8 outer_tcp_sport[0x1]; + u8 outer_tcp_dport[0x1]; + u8 outer_tcp_flags[0x1]; + u8 outer_gre_protocol[0x1]; + u8 outer_gre_key[0x1]; + u8 outer_vxlan_vni[0x1]; + u8 reserved_3[0x5]; + u8 source_eswitch_port[0x1]; + + u8 inner_dmac[0x1]; + u8 inner_smac[0x1]; + u8 inner_ether_type[0x1]; + u8 reserved_4[0x1]; + u8 inner_first_prio[0x1]; + u8 inner_first_cfi[0x1]; + u8 inner_first_vid[0x1]; + u8 reserved_5[0x1]; + u8 inner_second_prio[0x1]; + u8 inner_second_cfi[0x1]; + u8 inner_second_vid[0x1]; + u8 reserved_6[0x1]; + u8 inner_sip[0x1]; + u8 inner_dip[0x1]; + u8 inner_frag[0x1]; + u8 inner_ip_protocol[0x1]; + u8 inner_ip_ecn[0x1]; + u8 inner_ip_dscp[0x1]; + u8 inner_udp_sport[0x1]; + u8 inner_udp_dport[0x1]; + u8 inner_tcp_sport[0x1]; + u8 inner_tcp_dport[0x1]; + u8 inner_tcp_flags[0x1]; + u8 reserved_7[0x9]; + + u8 reserved_8[0x40]; +}; + +struct mlx5_ifc_flow_table_prop_layout_bits { + u8 ft_support[0x1]; + u8 reserved_0[0x1f]; + + u8 reserved_1[0x2]; + u8 log_max_ft_size[0x6]; + u8 reserved_2[0x10]; + u8 max_ft_level[0x8]; + + u8 reserved_3[0x20]; + + u8 reserved_4[0x18]; + u8 log_max_ft_num[0x8]; + + u8 reserved_5[0x18]; + u8 log_max_destination[0x8]; + + u8 reserved_6[0x18]; + u8 log_max_flow[0x8]; + + u8 reserved_7[0x40]; + + struct mlx5_ifc_flow_table_fields_supported_bits ft_field_support; + + struct mlx5_ifc_flow_table_fields_supported_bits ft_field_bitmask_support; +}; + +struct mlx5_ifc_odp_per_transport_service_cap_bits { + u8 send[0x1]; + u8 receive[0x1]; + u8 write[0x1]; + u8 read[0x1]; + u8 reserved_0[0x1]; + u8 srq_receive[0x1]; + u8 reserved_1[0x1a]; +}; + +struct mlx5_ifc_fte_match_set_lyr_2_4_bits { + u8 smac_47_16[0x20]; + + u8 smac_15_0[0x10]; + u8 ethertype[0x10]; + + u8 dmac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 first_prio[0x3]; + u8 first_cfi[0x1]; + u8 first_vid[0xc]; + + u8 ip_protocol[0x8]; + u8 ip_dscp[0x6]; + u8 ip_ecn[0x2]; + u8 vlan_tag[0x1]; + u8 reserved_0[0x1]; + u8 frag[0x1]; + u8 reserved_1[0x4]; + u8 tcp_flags[0x9]; + + u8 tcp_sport[0x10]; + u8 tcp_dport[0x10]; + + u8 reserved_2[0x20]; + + u8 udp_sport[0x10]; + u8 udp_dport[0x10]; + + u8 src_ip[4][0x20]; + + u8 dst_ip[4][0x20]; +}; + +struct mlx5_ifc_fte_match_set_misc_bits { + u8 reserved_0[0x20]; + + u8 reserved_1[0x10]; + u8 source_port[0x10]; + + u8 outer_second_prio[0x3]; + u8 outer_second_cfi[0x1]; + u8 outer_second_vid[0xc]; + u8 inner_second_prio[0x3]; + u8 inner_second_cfi[0x1]; + u8 inner_second_vid[0xc]; + + u8 outer_second_vlan_tag[0x1]; + u8 inner_second_vlan_tag[0x1]; + u8 reserved_2[0xe]; + u8 gre_protocol[0x10]; + + u8 gre_key_h[0x18]; + u8 gre_key_l[0x8]; + + u8 vxlan_vni[0x18]; + u8 reserved_3[0x8]; + + u8 reserved_4[0x20]; + + u8 reserved_5[0xc]; + u8 outer_ipv6_flow_label[0x14]; + + u8 reserved_6[0xc]; + u8 inner_ipv6_flow_label[0x14]; + + u8 reserved_7[0xe0]; +}; + +struct mlx5_ifc_cmd_pas_bits { + u8 pa_h[0x20]; + + u8 pa_l[0x14]; + u8 reserved_0[0xc]; +}; + +struct mlx5_ifc_uint64_bits { + u8 hi[0x20]; + + u8 lo[0x20]; +}; + +enum { + MLX5_ADS_STAT_RATE_NO_LIMIT = 0x0, + MLX5_ADS_STAT_RATE_2_5GBPS = 0x7, + MLX5_ADS_STAT_RATE_10GBPS = 0x8, + MLX5_ADS_STAT_RATE_30GBPS = 0x9, + MLX5_ADS_STAT_RATE_5GBPS = 0xa, + MLX5_ADS_STAT_RATE_20GBPS = 0xb, + MLX5_ADS_STAT_RATE_40GBPS = 0xc, + MLX5_ADS_STAT_RATE_60GBPS = 0xd, + MLX5_ADS_STAT_RATE_80GBPS = 0xe, + MLX5_ADS_STAT_RATE_120GBPS = 0xf, +}; + +struct mlx5_ifc_ads_bits { + u8 fl[0x1]; + u8 free_ar[0x1]; + u8 reserved_0[0xe]; + u8 pkey_index[0x10]; + + u8 reserved_1[0x8]; + u8 grh[0x1]; + u8 mlid[0x7]; + u8 rlid[0x10]; + + u8 ack_timeout[0x5]; + u8 reserved_2[0x3]; + u8 src_addr_index[0x8]; + u8 reserved_3[0x4]; + u8 stat_rate[0x4]; + u8 hop_limit[0x8]; + + u8 reserved_4[0x4]; + u8 tclass[0x8]; + u8 flow_label[0x14]; + + u8 rgid_rip[16][0x8]; + + u8 reserved_5[0x4]; + u8 f_dscp[0x1]; + u8 f_ecn[0x1]; + u8 reserved_6[0x1]; + u8 f_eth_prio[0x1]; + u8 ecn[0x2]; + u8 dscp[0x6]; + u8 udp_sport[0x10]; + + u8 dei_cfi[0x1]; + u8 eth_prio[0x3]; + u8 sl[0x4]; + u8 port[0x8]; + u8 rmac_47_32[0x10]; + + u8 rmac_31_0[0x20]; +}; + +struct mlx5_ifc_flow_table_nic_cap_bits { + u8 reserved_0[0x200]; + + struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive; + + u8 reserved_1[0x200]; + + struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive_sniffer; + + struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit; + + u8 reserved_2[0x200]; + + struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit_sniffer; + + u8 reserved_3[0x7200]; +}; + +struct mlx5_ifc_per_protocol_networking_offload_caps_bits { + u8 csum_cap[0x1]; + u8 vlan_cap[0x1]; + u8 lro_cap[0x1]; + u8 lro_psh_flag[0x1]; + u8 lro_time_stamp[0x1]; + u8 reserved_0[0x6]; + u8 max_lso_cap[0x5]; + u8 reserved_1[0x4]; + u8 rss_ind_tbl_cap[0x4]; + u8 reserved_2[0x3]; + u8 tunnel_lso_const_out_ip_id[0x1]; + u8 reserved_3[0x2]; + u8 tunnel_statless_gre[0x1]; + u8 tunnel_stateless_vxlan[0x1]; + + u8 reserved_4[0x20]; + + u8 reserved_5[0x10]; + u8 lro_min_mss_size[0x10]; + + u8 reserved_6[0x120]; + + u8 lro_timer_supported_periods[4][0x20]; + + u8 reserved_7[0x600]; +}; + +struct mlx5_ifc_roce_cap_bits { + u8 roce_apm[0x1]; + u8 reserved_0[0x1f]; + + u8 reserved_1[0x60]; + + u8 reserved_2[0xc]; + u8 l3_type[0x4]; + u8 reserved_3[0x8]; + u8 roce_version[0x8]; + + u8 reserved_4[0x10]; + u8 r_roce_dest_udp_port[0x10]; + + u8 r_roce_max_src_udp_port[0x10]; + u8 r_roce_min_src_udp_port[0x10]; + + u8 reserved_5[0x10]; + u8 roce_address_table_size[0x10]; + + u8 reserved_6[0x700]; +}; + +enum { + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_1_BYTE = 0x0, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_2_BYTES = 0x2, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_4_BYTES = 0x4, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_8_BYTES = 0x8, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_16_BYTES = 0x10, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_32_BYTES = 0x20, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_64_BYTES = 0x40, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_128_BYTES = 0x80, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_256_BYTES = 0x100, +}; + +enum { + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_1_BYTE = 0x1, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_2_BYTES = 0x2, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_4_BYTES = 0x4, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_8_BYTES = 0x8, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_16_BYTES = 0x10, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_32_BYTES = 0x20, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_64_BYTES = 0x40, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_128_BYTES = 0x80, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_256_BYTES = 0x100, +}; + +struct mlx5_ifc_atomic_caps_bits { + u8 reserved_0[0x40]; + + u8 atomic_req_endianness[0x1]; + u8 reserved_1[0x1f]; + + u8 reserved_2[0x20]; + + u8 reserved_3[0x10]; + u8 atomic_operations[0x10]; + + u8 reserved_4[0x10]; + u8 atomic_size_qp[0x10]; + + u8 reserved_5[0x10]; + u8 atomic_size_dc[0x10]; + + u8 reserved_6[0x720]; +}; + +struct mlx5_ifc_odp_cap_bits { + u8 reserved_0[0x40]; + + u8 sig[0x1]; + u8 reserved_1[0x1f]; + + u8 reserved_2[0x20]; + + struct mlx5_ifc_odp_per_transport_service_cap_bits rc_odp_caps; + + struct mlx5_ifc_odp_per_transport_service_cap_bits uc_odp_caps; + + struct mlx5_ifc_odp_per_transport_service_cap_bits ud_odp_caps; + + u8 reserved_3[0x720]; +}; + +enum { + MLX5_WQ_TYPE_LINKED_LIST = 0x0, + MLX5_WQ_TYPE_CYCLIC = 0x1, + MLX5_WQ_TYPE_STRQ = 0x2, +}; + +enum { + MLX5_WQ_END_PAD_MODE_NONE = 0x0, + MLX5_WQ_END_PAD_MODE_ALIGN = 0x1, +}; + +enum { + MLX5_CMD_HCA_CAP_GID_TABLE_SIZE_8_GID_ENTRIES = 0x0, + MLX5_CMD_HCA_CAP_GID_TABLE_SIZE_16_GID_ENTRIES = 0x1, + MLX5_CMD_HCA_CAP_GID_TABLE_SIZE_32_GID_ENTRIES = 0x2, + MLX5_CMD_HCA_CAP_GID_TABLE_SIZE_64_GID_ENTRIES = 0x3, + MLX5_CMD_HCA_CAP_GID_TABLE_SIZE_128_GID_ENTRIES = 0x4, +}; + +enum { + MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_128_ENTRIES = 0x0, + MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_256_ENTRIES = 0x1, + MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_512_ENTRIES = 0x2, + MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_1K_ENTRIES = 0x3, + MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_2K_ENTRIES = 0x4, + MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_4K_ENTRIES = 0x5, +}; + +enum { + MLX5_CMD_HCA_CAP_PORT_TYPE_IB = 0x0, + MLX5_CMD_HCA_CAP_PORT_TYPE_ETHERNET = 0x1, +}; + +enum { + MLX5_CMD_HCA_CAP_CMDIF_CHECKSUM_DISABLED = 0x0, + MLX5_CMD_HCA_CAP_CMDIF_CHECKSUM_INITIAL_STATE = 0x1, + MLX5_CMD_HCA_CAP_CMDIF_CHECKSUM_ENABLED = 0x3, +}; + +enum { + MLX5_CAP_PORT_TYPE_IB = 0x0, + MLX5_CAP_PORT_TYPE_ETH = 0x1, }; struct mlx5_ifc_cmd_hca_cap_bits { @@ -148,9 +611,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_1[0xb]; u8 log_max_qp[0x5]; - u8 log_max_strq_sz[0x8]; - u8 reserved_2[0x3]; - u8 log_max_srqs[0x5]; + u8 reserved_2[0xb]; + u8 log_max_srq[0x5]; u8 reserved_3[0x10]; u8 reserved_4[0x8]; @@ -185,165 +647,6123 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 pad_cap[0x1]; u8 cc_query_allowed[0x1]; u8 cc_modify_allowed[0x1]; - u8 reserved_15[0x1d]; + u8 reserved_15[0xd]; + u8 gid_table_size[0x10]; - u8 reserved_16[0x6]; + u8 out_of_seq_cnt[0x1]; + u8 vport_counters[0x1]; + u8 reserved_16[0x4]; u8 max_qp_cnt[0xa]; u8 pkey_table_size[0x10]; - u8 eswitch_owner[0x1]; - u8 reserved_17[0xa]; + u8 vport_group_manager[0x1]; + u8 vhca_group_manager[0x1]; + u8 ib_virt[0x1]; + u8 eth_virt[0x1]; + u8 reserved_17[0x1]; + u8 ets[0x1]; + u8 nic_flow_table[0x1]; + u8 reserved_18[0x4]; u8 local_ca_ack_delay[0x5]; - u8 reserved_18[0x8]; + u8 reserved_19[0x6]; + u8 port_type[0x2]; u8 num_ports[0x8]; - u8 reserved_19[0x3]; + u8 reserved_20[0x3]; u8 log_max_msg[0x5]; - u8 reserved_20[0x18]; + u8 reserved_21[0x18]; u8 stat_rate_support[0x10]; - u8 reserved_21[0x10]; + u8 reserved_22[0xc]; + u8 cqe_version[0x4]; - u8 reserved_22[0x10]; + u8 compact_address_vector[0x1]; + u8 reserved_23[0xe]; + u8 drain_sigerr[0x1]; u8 cmdif_checksum[0x2]; u8 sigerr_cqe[0x1]; - u8 reserved_23[0x1]; + u8 reserved_24[0x1]; u8 wq_signature[0x1]; u8 sctr_data_cqe[0x1]; - u8 reserved_24[0x1]; + u8 reserved_25[0x1]; u8 sho[0x1]; u8 tph[0x1]; u8 rf[0x1]; - u8 dc[0x1]; - u8 reserved_25[0x2]; + u8 dct[0x1]; + u8 reserved_26[0x1]; + u8 eth_net_offloads[0x1]; u8 roce[0x1]; u8 atomic[0x1]; - u8 rsz_srq[0x1]; + u8 reserved_27[0x1]; u8 cq_oi[0x1]; u8 cq_resize[0x1]; u8 cq_moderation[0x1]; - u8 sniffer_rule_flow[0x1]; - u8 sniffer_rule_vport[0x1]; - u8 sniffer_rule_phy[0x1]; - u8 reserved_26[0x1]; + u8 reserved_28[0x3]; + u8 cq_eq_remap[0x1]; u8 pg[0x1]; u8 block_lb_mc[0x1]; - u8 reserved_27[0x3]; + u8 reserved_29[0x1]; + u8 scqe_break_moderation[0x1]; + u8 reserved_30[0x1]; u8 cd[0x1]; - u8 reserved_28[0x1]; + u8 reserved_31[0x1]; u8 apm[0x1]; - u8 reserved_29[0x7]; + u8 reserved_32[0x7]; u8 qkv[0x1]; u8 pkv[0x1]; - u8 reserved_30[0x4]; + u8 reserved_33[0x4]; u8 xrc[0x1]; u8 ud[0x1]; u8 uc[0x1]; u8 rc[0x1]; - u8 reserved_31[0xa]; + u8 reserved_34[0xa]; u8 uar_sz[0x6]; - u8 reserved_32[0x8]; + u8 reserved_35[0x8]; u8 log_pg_sz[0x8]; u8 bf[0x1]; - u8 reserved_33[0xa]; + u8 reserved_36[0x1]; + u8 pad_tx_eth_packet[0x1]; + u8 reserved_37[0x8]; u8 log_bf_reg_size[0x5]; - u8 reserved_34[0x10]; + u8 reserved_38[0x10]; - u8 reserved_35[0x10]; + u8 reserved_39[0x10]; u8 max_wqe_sz_sq[0x10]; - u8 reserved_36[0x10]; + u8 reserved_40[0x10]; u8 max_wqe_sz_rq[0x10]; - u8 reserved_37[0x10]; + u8 reserved_41[0x10]; u8 max_wqe_sz_sq_dc[0x10]; - u8 reserved_38[0x7]; + u8 reserved_42[0x7]; u8 max_qp_mcg[0x19]; - u8 reserved_39[0x18]; + u8 reserved_43[0x18]; u8 log_max_mcg[0x8]; - u8 reserved_40[0xb]; + u8 reserved_44[0x3]; + u8 log_max_transport_domain[0x5]; + u8 reserved_45[0x3]; u8 log_max_pd[0x5]; - u8 reserved_41[0xb]; + u8 reserved_46[0xb]; u8 log_max_xrcd[0x5]; - u8 reserved_42[0x20]; + u8 reserved_47[0x20]; - u8 reserved_43[0x3]; + u8 reserved_48[0x3]; u8 log_max_rq[0x5]; - u8 reserved_44[0x3]; + u8 reserved_49[0x3]; u8 log_max_sq[0x5]; - u8 reserved_45[0x3]; + u8 reserved_50[0x3]; u8 log_max_tir[0x5]; - u8 reserved_46[0x3]; + u8 reserved_51[0x3]; u8 log_max_tis[0x5]; - u8 reserved_47[0x13]; - u8 log_max_rq_per_tir[0x5]; - u8 reserved_48[0x3]; + u8 basic_cyclic_rcv_wqe[0x1]; + u8 reserved_52[0x2]; + u8 log_max_rmp[0x5]; + u8 reserved_53[0x3]; + u8 log_max_rqt[0x5]; + u8 reserved_54[0x3]; + u8 log_max_rqt_size[0x5]; + u8 reserved_55[0x3]; u8 log_max_tis_per_sq[0x5]; - u8 reserved_49[0xe0]; + u8 reserved_56[0x3]; + u8 log_max_stride_sz_rq[0x5]; + u8 reserved_57[0x3]; + u8 log_min_stride_sz_rq[0x5]; + u8 reserved_58[0x3]; + u8 log_max_stride_sz_sq[0x5]; + u8 reserved_59[0x3]; + u8 log_min_stride_sz_sq[0x5]; + + u8 reserved_60[0x1b]; + u8 log_max_wq_sz[0x5]; + + u8 reserved_61[0xa0]; - u8 reserved_50[0x10]; + u8 reserved_62[0x3]; + u8 log_max_l2_table[0x5]; + u8 reserved_63[0x8]; u8 log_uar_page_sz[0x10]; - u8 reserved_51[0x100]; + u8 reserved_64[0x100]; - u8 reserved_52[0x1f]; + u8 reserved_65[0x1f]; u8 cqe_zip[0x1]; u8 cqe_zip_timeout[0x10]; u8 cqe_zip_max_num[0x10]; - u8 reserved_53[0x220]; + u8 reserved_66[0x220]; }; -struct mlx5_ifc_set_hca_cap_in_bits { - u8 opcode[0x10]; - u8 reserved_0[0x10]; +enum { + MLX5_DEST_FORMAT_STRUCT_DESTINATION_TYPE_FLOW_TABLE_ = 0x1, + MLX5_DEST_FORMAT_STRUCT_DESTINATION_TYPE_TIR = 0x2, +}; - u8 reserved_1[0x10]; - u8 op_mod[0x10]; +struct mlx5_ifc_dest_format_struct_bits { + u8 destination_type[0x8]; + u8 destination_id[0x18]; - u8 reserved_2[0x40]; + u8 reserved_0[0x20]; +}; + +struct mlx5_ifc_fte_match_param_bits { + struct mlx5_ifc_fte_match_set_lyr_2_4_bits outer_headers; + + struct mlx5_ifc_fte_match_set_misc_bits misc_parameters; + + struct mlx5_ifc_fte_match_set_lyr_2_4_bits inner_headers; - struct mlx5_ifc_cmd_hca_cap_bits hca_capability_struct; + u8 reserved_0[0xa00]; }; -struct mlx5_ifc_query_hca_cap_in_bits { - u8 opcode[0x10]; - u8 reserved_0[0x10]; +enum { + MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_SRC_IP = 0x0, + MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_DST_IP = 0x1, + MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_L4_SPORT = 0x2, + MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_L4_DPORT = 0x3, + MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_IPSEC_SPI = 0x4, +}; - u8 reserved_1[0x10]; - u8 op_mod[0x10]; +struct mlx5_ifc_rx_hash_field_select_bits { + u8 l3_prot_type[0x1]; + u8 l4_prot_type[0x1]; + u8 selected_fields[0x1e]; +}; - u8 reserved_2[0x40]; +enum { + MLX5_WQ_WQ_TYPE_WQ_LINKED_LIST = 0x0, + MLX5_WQ_WQ_TYPE_WQ_CYCLIC = 0x1, }; -struct mlx5_ifc_query_hca_cap_out_bits { - u8 status[0x8]; +enum { + MLX5_WQ_END_PADDING_MODE_END_PAD_NONE = 0x0, + MLX5_WQ_END_PADDING_MODE_END_PAD_ALIGN = 0x1, +}; + +struct mlx5_ifc_wq_bits { + u8 wq_type[0x4]; + u8 wq_signature[0x1]; + u8 end_padding_mode[0x2]; + u8 cd_slave[0x1]; u8 reserved_0[0x18]; - u8 syndrome[0x20]; + u8 hds_skip_first_sge[0x1]; + u8 log2_hds_buf_size[0x3]; + u8 reserved_1[0x7]; + u8 page_offset[0x5]; + u8 lwm[0x10]; - u8 reserved_1[0x40]; + u8 reserved_2[0x8]; + u8 pd[0x18]; + + u8 reserved_3[0x8]; + u8 uar_page[0x18]; + + u8 dbr_addr[0x40]; + + u8 hw_counter[0x20]; + + u8 sw_counter[0x20]; + + u8 reserved_4[0xc]; + u8 log_wq_stride[0x4]; + u8 reserved_5[0x3]; + u8 log_wq_pg_sz[0x5]; + u8 reserved_6[0x3]; + u8 log_wq_sz[0x5]; + + u8 reserved_7[0x4e0]; - u8 capability_struct[256][0x8]; + struct mlx5_ifc_cmd_pas_bits pas[0]; }; -struct mlx5_ifc_set_hca_cap_out_bits { - u8 status[0x8]; - u8 reserved_0[0x18]; +struct mlx5_ifc_rq_num_bits { + u8 reserved_0[0x8]; + u8 rq_num[0x18]; +}; - u8 syndrome[0x20]; +struct mlx5_ifc_mac_address_layout_bits { + u8 reserved_0[0x10]; + u8 mac_addr_47_32[0x10]; - u8 reserved_1[0x40]; + u8 mac_addr_31_0[0x20]; +}; + +struct mlx5_ifc_cong_control_r_roce_ecn_np_bits { + u8 reserved_0[0xa0]; + + u8 min_time_between_cnps[0x20]; + + u8 reserved_1[0x12]; + u8 cnp_dscp[0x6]; + u8 reserved_2[0x5]; + u8 cnp_802p_prio[0x3]; + + u8 reserved_3[0x720]; +}; + +struct mlx5_ifc_cong_control_r_roce_ecn_rp_bits { + u8 reserved_0[0x60]; + + u8 reserved_1[0x4]; + u8 clamp_tgt_rate[0x1]; + u8 reserved_2[0x3]; + u8 clamp_tgt_rate_after_time_inc[0x1]; + u8 reserved_3[0x17]; + + u8 reserved_4[0x20]; + + u8 rpg_time_reset[0x20]; + + u8 rpg_byte_reset[0x20]; + + u8 rpg_threshold[0x20]; + + u8 rpg_max_rate[0x20]; + + u8 rpg_ai_rate[0x20]; + + u8 rpg_hai_rate[0x20]; + + u8 rpg_gd[0x20]; + + u8 rpg_min_dec_fac[0x20]; + + u8 rpg_min_rate[0x20]; + + u8 reserved_5[0xe0]; + + u8 rate_to_set_on_first_cnp[0x20]; + + u8 dce_tcp_g[0x20]; + + u8 dce_tcp_rtt[0x20]; + + u8 rate_reduce_monitor_period[0x20]; + + u8 reserved_6[0x20]; + + u8 initial_alpha_value[0x20]; + + u8 reserved_7[0x4a0]; +}; + +struct mlx5_ifc_cong_control_802_1qau_rp_bits { + u8 reserved_0[0x80]; + + u8 rppp_max_rps[0x20]; + + u8 rpg_time_reset[0x20]; + + u8 rpg_byte_reset[0x20]; + + u8 rpg_threshold[0x20]; + + u8 rpg_max_rate[0x20]; + + u8 rpg_ai_rate[0x20]; + + u8 rpg_hai_rate[0x20]; + + u8 rpg_gd[0x20]; + + u8 rpg_min_dec_fac[0x20]; + + u8 rpg_min_rate[0x20]; + + u8 reserved_1[0x640]; +}; + +enum { + MLX5_RESIZE_FIELD_SELECT_RESIZE_FIELD_SELECT_LOG_CQ_SIZE = 0x1, + MLX5_RESIZE_FIELD_SELECT_RESIZE_FIELD_SELECT_PAGE_OFFSET = 0x2, + MLX5_RESIZE_FIELD_SELECT_RESIZE_FIELD_SELECT_LOG_PAGE_SIZE = 0x4, +}; + +struct mlx5_ifc_resize_field_select_bits { + u8 resize_field_select[0x20]; +}; + +enum { + MLX5_MODIFY_FIELD_SELECT_MODIFY_FIELD_SELECT_CQ_PERIOD = 0x1, + MLX5_MODIFY_FIELD_SELECT_MODIFY_FIELD_SELECT_CQ_MAX_COUNT = 0x2, + MLX5_MODIFY_FIELD_SELECT_MODIFY_FIELD_SELECT_OI = 0x4, + MLX5_MODIFY_FIELD_SELECT_MODIFY_FIELD_SELECT_C_EQN = 0x8, +}; + +struct mlx5_ifc_modify_field_select_bits { + u8 modify_field_select[0x20]; +}; + +struct mlx5_ifc_field_select_r_roce_np_bits { + u8 field_select_r_roce_np[0x20]; +}; + +struct mlx5_ifc_field_select_r_roce_rp_bits { + u8 field_select_r_roce_rp[0x20]; +}; + +enum { + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPPP_MAX_RPS = 0x4, + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_TIME_RESET = 0x8, + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_BYTE_RESET = 0x10, + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_THRESHOLD = 0x20, + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_MAX_RATE = 0x40, + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_AI_RATE = 0x80, + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_HAI_RATE = 0x100, + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_GD = 0x200, + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_MIN_DEC_FAC = 0x400, + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_MIN_RATE = 0x800, +}; + +struct mlx5_ifc_field_select_802_1qau_rp_bits { + u8 field_select_8021qaurp[0x20]; +}; + +struct mlx5_ifc_phys_layer_cntrs_bits { + u8 time_since_last_clear_high[0x20]; + + u8 time_since_last_clear_low[0x20]; + + u8 symbol_errors_high[0x20]; + + u8 symbol_errors_low[0x20]; + + u8 sync_headers_errors_high[0x20]; + + u8 sync_headers_errors_low[0x20]; + + u8 edpl_bip_errors_lane0_high[0x20]; + + u8 edpl_bip_errors_lane0_low[0x20]; + + u8 edpl_bip_errors_lane1_high[0x20]; + + u8 edpl_bip_errors_lane1_low[0x20]; + + u8 edpl_bip_errors_lane2_high[0x20]; + + u8 edpl_bip_errors_lane2_low[0x20]; + + u8 edpl_bip_errors_lane3_high[0x20]; + + u8 edpl_bip_errors_lane3_low[0x20]; + + u8 fc_fec_corrected_blocks_lane0_high[0x20]; + + u8 fc_fec_corrected_blocks_lane0_low[0x20]; + + u8 fc_fec_corrected_blocks_lane1_high[0x20]; + + u8 fc_fec_corrected_blocks_lane1_low[0x20]; + + u8 fc_fec_corrected_blocks_lane2_high[0x20]; + + u8 fc_fec_corrected_blocks_lane2_low[0x20]; + + u8 fc_fec_corrected_blocks_lane3_high[0x20]; + + u8 fc_fec_corrected_blocks_lane3_low[0x20]; + + u8 fc_fec_uncorrectable_blocks_lane0_high[0x20]; + + u8 fc_fec_uncorrectable_blocks_lane0_low[0x20]; + + u8 fc_fec_uncorrectable_blocks_lane1_high[0x20]; + + u8 fc_fec_uncorrectable_blocks_lane1_low[0x20]; + + u8 fc_fec_uncorrectable_blocks_lane2_high[0x20]; + + u8 fc_fec_uncorrectable_blocks_lane2_low[0x20]; + + u8 fc_fec_uncorrectable_blocks_lane3_high[0x20]; + + u8 fc_fec_uncorrectable_blocks_lane3_low[0x20]; + + u8 rs_fec_corrected_blocks_high[0x20]; + + u8 rs_fec_corrected_blocks_low[0x20]; + + u8 rs_fec_uncorrectable_blocks_high[0x20]; + + u8 rs_fec_uncorrectable_blocks_low[0x20]; + + u8 rs_fec_no_errors_blocks_high[0x20]; + + u8 rs_fec_no_errors_blocks_low[0x20]; + + u8 rs_fec_single_error_blocks_high[0x20]; + + u8 rs_fec_single_error_blocks_low[0x20]; + + u8 rs_fec_corrected_symbols_total_high[0x20]; + + u8 rs_fec_corrected_symbols_total_low[0x20]; + + u8 rs_fec_corrected_symbols_lane0_high[0x20]; + + u8 rs_fec_corrected_symbols_lane0_low[0x20]; + + u8 rs_fec_corrected_symbols_lane1_high[0x20]; + + u8 rs_fec_corrected_symbols_lane1_low[0x20]; + + u8 rs_fec_corrected_symbols_lane2_high[0x20]; + + u8 rs_fec_corrected_symbols_lane2_low[0x20]; + + u8 rs_fec_corrected_symbols_lane3_high[0x20]; + + u8 rs_fec_corrected_symbols_lane3_low[0x20]; + + u8 link_down_events[0x20]; + + u8 successful_recovery_events[0x20]; + + u8 reserved_0[0x180]; +}; + +struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits { + u8 transmit_queue_high[0x20]; + + u8 transmit_queue_low[0x20]; + + u8 reserved_0[0x780]; +}; + +struct mlx5_ifc_eth_per_prio_grp_data_layout_bits { + u8 rx_octets_high[0x20]; + + u8 rx_octets_low[0x20]; + + u8 reserved_0[0xc0]; + + u8 rx_frames_high[0x20]; + + u8 rx_frames_low[0x20]; + + u8 tx_octets_high[0x20]; + + u8 tx_octets_low[0x20]; + + u8 reserved_1[0xc0]; + + u8 tx_frames_high[0x20]; + + u8 tx_frames_low[0x20]; + + u8 rx_pause_high[0x20]; + + u8 rx_pause_low[0x20]; + + u8 rx_pause_duration_high[0x20]; + + u8 rx_pause_duration_low[0x20]; + + u8 tx_pause_high[0x20]; + + u8 tx_pause_low[0x20]; + + u8 tx_pause_duration_high[0x20]; + + u8 tx_pause_duration_low[0x20]; + + u8 rx_pause_transition_high[0x20]; + + u8 rx_pause_transition_low[0x20]; + + u8 reserved_2[0x400]; +}; + +struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits { + u8 port_transmit_wait_high[0x20]; + + u8 port_transmit_wait_low[0x20]; + + u8 reserved_0[0x780]; +}; + +struct mlx5_ifc_eth_3635_cntrs_grp_data_layout_bits { + u8 dot3stats_alignment_errors_high[0x20]; + + u8 dot3stats_alignment_errors_low[0x20]; + + u8 dot3stats_fcs_errors_high[0x20]; + + u8 dot3stats_fcs_errors_low[0x20]; + + u8 dot3stats_single_collision_frames_high[0x20]; + + u8 dot3stats_single_collision_frames_low[0x20]; + + u8 dot3stats_multiple_collision_frames_high[0x20]; + + u8 dot3stats_multiple_collision_frames_low[0x20]; + + u8 dot3stats_sqe_test_errors_high[0x20]; + + u8 dot3stats_sqe_test_errors_low[0x20]; + + u8 dot3stats_deferred_transmissions_high[0x20]; + + u8 dot3stats_deferred_transmissions_low[0x20]; + + u8 dot3stats_late_collisions_high[0x20]; + + u8 dot3stats_late_collisions_low[0x20]; + + u8 dot3stats_excessive_collisions_high[0x20]; + + u8 dot3stats_excessive_collisions_low[0x20]; + + u8 dot3stats_internal_mac_transmit_errors_high[0x20]; + + u8 dot3stats_internal_mac_transmit_errors_low[0x20]; + + u8 dot3stats_carrier_sense_errors_high[0x20]; + + u8 dot3stats_carrier_sense_errors_low[0x20]; + + u8 dot3stats_frame_too_longs_high[0x20]; + + u8 dot3stats_frame_too_longs_low[0x20]; + + u8 dot3stats_internal_mac_receive_errors_high[0x20]; + + u8 dot3stats_internal_mac_receive_errors_low[0x20]; + + u8 dot3stats_symbol_errors_high[0x20]; + + u8 dot3stats_symbol_errors_low[0x20]; + + u8 dot3control_in_unknown_opcodes_high[0x20]; + + u8 dot3control_in_unknown_opcodes_low[0x20]; + + u8 dot3in_pause_frames_high[0x20]; + + u8 dot3in_pause_frames_low[0x20]; + + u8 dot3out_pause_frames_high[0x20]; + + u8 dot3out_pause_frames_low[0x20]; + + u8 reserved_0[0x3c0]; +}; + +struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits { + u8 ether_stats_drop_events_high[0x20]; + + u8 ether_stats_drop_events_low[0x20]; + + u8 ether_stats_octets_high[0x20]; + + u8 ether_stats_octets_low[0x20]; + + u8 ether_stats_pkts_high[0x20]; + + u8 ether_stats_pkts_low[0x20]; + + u8 ether_stats_broadcast_pkts_high[0x20]; + + u8 ether_stats_broadcast_pkts_low[0x20]; + + u8 ether_stats_multicast_pkts_high[0x20]; + + u8 ether_stats_multicast_pkts_low[0x20]; + + u8 ether_stats_crc_align_errors_high[0x20]; + + u8 ether_stats_crc_align_errors_low[0x20]; + + u8 ether_stats_undersize_pkts_high[0x20]; + + u8 ether_stats_undersize_pkts_low[0x20]; + + u8 ether_stats_oversize_pkts_high[0x20]; + + u8 ether_stats_oversize_pkts_low[0x20]; + + u8 ether_stats_fragments_high[0x20]; + + u8 ether_stats_fragments_low[0x20]; + + u8 ether_stats_jabbers_high[0x20]; + + u8 ether_stats_jabbers_low[0x20]; + + u8 ether_stats_collisions_high[0x20]; + + u8 ether_stats_collisions_low[0x20]; + + u8 ether_stats_pkts64octets_high[0x20]; + + u8 ether_stats_pkts64octets_low[0x20]; + + u8 ether_stats_pkts65to127octets_high[0x20]; + + u8 ether_stats_pkts65to127octets_low[0x20]; + + u8 ether_stats_pkts128to255octets_high[0x20]; + + u8 ether_stats_pkts128to255octets_low[0x20]; + + u8 ether_stats_pkts256to511octets_high[0x20]; + + u8 ether_stats_pkts256to511octets_low[0x20]; + + u8 ether_stats_pkts512to1023octets_high[0x20]; + + u8 ether_stats_pkts512to1023octets_low[0x20]; + + u8 ether_stats_pkts1024to1518octets_high[0x20]; + + u8 ether_stats_pkts1024to1518octets_low[0x20]; + + u8 ether_stats_pkts1519to2047octets_high[0x20]; + + u8 ether_stats_pkts1519to2047octets_low[0x20]; + + u8 ether_stats_pkts2048to4095octets_high[0x20]; + + u8 ether_stats_pkts2048to4095octets_low[0x20]; + + u8 ether_stats_pkts4096to8191octets_high[0x20]; + + u8 ether_stats_pkts4096to8191octets_low[0x20]; + + u8 ether_stats_pkts8192to10239octets_high[0x20]; + + u8 ether_stats_pkts8192to10239octets_low[0x20]; + + u8 reserved_0[0x280]; +}; + +struct mlx5_ifc_eth_2863_cntrs_grp_data_layout_bits { + u8 if_in_octets_high[0x20]; + + u8 if_in_octets_low[0x20]; + + u8 if_in_ucast_pkts_high[0x20]; + + u8 if_in_ucast_pkts_low[0x20]; + + u8 if_in_discards_high[0x20]; + + u8 if_in_discards_low[0x20]; + + u8 if_in_errors_high[0x20]; + + u8 if_in_errors_low[0x20]; + + u8 if_in_unknown_protos_high[0x20]; + + u8 if_in_unknown_protos_low[0x20]; + + u8 if_out_octets_high[0x20]; + + u8 if_out_octets_low[0x20]; + + u8 if_out_ucast_pkts_high[0x20]; + + u8 if_out_ucast_pkts_low[0x20]; + + u8 if_out_discards_high[0x20]; + + u8 if_out_discards_low[0x20]; + + u8 if_out_errors_high[0x20]; + + u8 if_out_errors_low[0x20]; + + u8 if_in_multicast_pkts_high[0x20]; + + u8 if_in_multicast_pkts_low[0x20]; + + u8 if_in_broadcast_pkts_high[0x20]; + + u8 if_in_broadcast_pkts_low[0x20]; + + u8 if_out_multicast_pkts_high[0x20]; + + u8 if_out_multicast_pkts_low[0x20]; + + u8 if_out_broadcast_pkts_high[0x20]; + + u8 if_out_broadcast_pkts_low[0x20]; + + u8 reserved_0[0x480]; +}; + +struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits { + u8 a_frames_transmitted_ok_high[0x20]; + + u8 a_frames_transmitted_ok_low[0x20]; + + u8 a_frames_received_ok_high[0x20]; + + u8 a_frames_received_ok_low[0x20]; + + u8 a_frame_check_sequence_errors_high[0x20]; + + u8 a_frame_check_sequence_errors_low[0x20]; + + u8 a_alignment_errors_high[0x20]; + + u8 a_alignment_errors_low[0x20]; + + u8 a_octets_transmitted_ok_high[0x20]; + + u8 a_octets_transmitted_ok_low[0x20]; + + u8 a_octets_received_ok_high[0x20]; + + u8 a_octets_received_ok_low[0x20]; + + u8 a_multicast_frames_xmitted_ok_high[0x20]; + + u8 a_multicast_frames_xmitted_ok_low[0x20]; + + u8 a_broadcast_frames_xmitted_ok_high[0x20]; + + u8 a_broadcast_frames_xmitted_ok_low[0x20]; + + u8 a_multicast_frames_received_ok_high[0x20]; + + u8 a_multicast_frames_received_ok_low[0x20]; + + u8 a_broadcast_frames_received_ok_high[0x20]; + + u8 a_broadcast_frames_received_ok_low[0x20]; + + u8 a_in_range_length_errors_high[0x20]; + + u8 a_in_range_length_errors_low[0x20]; + + u8 a_out_of_range_length_field_high[0x20]; + + u8 a_out_of_range_length_field_low[0x20]; + + u8 a_frame_too_long_errors_high[0x20]; + + u8 a_frame_too_long_errors_low[0x20]; + + u8 a_symbol_error_during_carrier_high[0x20]; + + u8 a_symbol_error_during_carrier_low[0x20]; + + u8 a_mac_control_frames_transmitted_high[0x20]; + + u8 a_mac_control_frames_transmitted_low[0x20]; + + u8 a_mac_control_frames_received_high[0x20]; + + u8 a_mac_control_frames_received_low[0x20]; + + u8 a_unsupported_opcodes_received_high[0x20]; + + u8 a_unsupported_opcodes_received_low[0x20]; + + u8 a_pause_mac_ctrl_frames_received_high[0x20]; + + u8 a_pause_mac_ctrl_frames_received_low[0x20]; + + u8 a_pause_mac_ctrl_frames_transmitted_high[0x20]; + + u8 a_pause_mac_ctrl_frames_transmitted_low[0x20]; + + u8 reserved_0[0x300]; +}; + +struct mlx5_ifc_cmd_inter_comp_event_bits { + u8 command_completion_vector[0x20]; + + u8 reserved_0[0xc0]; +}; + +struct mlx5_ifc_stall_vl_event_bits { + u8 reserved_0[0x18]; + u8 port_num[0x1]; + u8 reserved_1[0x3]; + u8 vl[0x4]; + + u8 reserved_2[0xa0]; +}; + +struct mlx5_ifc_db_bf_congestion_event_bits { + u8 event_subtype[0x8]; + u8 reserved_0[0x8]; + u8 congestion_level[0x8]; + u8 reserved_1[0x8]; + + u8 reserved_2[0xa0]; +}; + +struct mlx5_ifc_gpio_event_bits { + u8 reserved_0[0x60]; + + u8 gpio_event_hi[0x20]; + + u8 gpio_event_lo[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_port_state_change_event_bits { + u8 reserved_0[0x40]; + + u8 port_num[0x4]; + u8 reserved_1[0x1c]; + + u8 reserved_2[0x80]; +}; + +struct mlx5_ifc_dropped_packet_logged_bits { + u8 reserved_0[0xe0]; +}; + +enum { + MLX5_CQ_ERROR_SYNDROME_CQ_OVERRUN = 0x1, + MLX5_CQ_ERROR_SYNDROME_CQ_ACCESS_VIOLATION_ERROR = 0x2, +}; + +struct mlx5_ifc_cq_error_bits { + u8 reserved_0[0x8]; + u8 cqn[0x18]; + + u8 reserved_1[0x20]; + + u8 reserved_2[0x18]; + u8 syndrome[0x8]; + + u8 reserved_3[0x80]; +}; + +struct mlx5_ifc_rdma_page_fault_event_bits { + u8 bytes_committed[0x20]; + + u8 r_key[0x20]; + + u8 reserved_0[0x10]; + u8 packet_len[0x10]; + + u8 rdma_op_len[0x20]; + + u8 rdma_va[0x40]; + + u8 reserved_1[0x5]; + u8 rdma[0x1]; + u8 write[0x1]; + u8 requestor[0x1]; + u8 qp_number[0x18]; +}; + +struct mlx5_ifc_wqe_associated_page_fault_event_bits { + u8 bytes_committed[0x20]; + + u8 reserved_0[0x10]; + u8 wqe_index[0x10]; + + u8 reserved_1[0x10]; + u8 len[0x10]; + + u8 reserved_2[0x60]; + + u8 reserved_3[0x5]; + u8 rdma[0x1]; + u8 write_read[0x1]; + u8 requestor[0x1]; + u8 qpn[0x18]; +}; + +struct mlx5_ifc_qp_events_bits { + u8 reserved_0[0xa0]; + + u8 type[0x8]; + u8 reserved_1[0x18]; + + u8 reserved_2[0x8]; + u8 qpn_rqn_sqn[0x18]; +}; + +struct mlx5_ifc_dct_events_bits { + u8 reserved_0[0xc0]; + + u8 reserved_1[0x8]; + u8 dct_number[0x18]; +}; + +struct mlx5_ifc_comp_event_bits { + u8 reserved_0[0xc0]; + + u8 reserved_1[0x8]; + u8 cq_number[0x18]; +}; + +enum { + MLX5_QPC_STATE_RST = 0x0, + MLX5_QPC_STATE_INIT = 0x1, + MLX5_QPC_STATE_RTR = 0x2, + MLX5_QPC_STATE_RTS = 0x3, + MLX5_QPC_STATE_SQER = 0x4, + MLX5_QPC_STATE_ERR = 0x6, + MLX5_QPC_STATE_SQD = 0x7, + MLX5_QPC_STATE_SUSPENDED = 0x9, +}; + +enum { + MLX5_QPC_ST_RC = 0x0, + MLX5_QPC_ST_UC = 0x1, + MLX5_QPC_ST_UD = 0x2, + MLX5_QPC_ST_XRC = 0x3, + MLX5_QPC_ST_DCI = 0x5, + MLX5_QPC_ST_QP0 = 0x7, + MLX5_QPC_ST_QP1 = 0x8, + MLX5_QPC_ST_RAW_DATAGRAM = 0x9, + MLX5_QPC_ST_REG_UMR = 0xc, +}; + +enum { + MLX5_QPC_PM_STATE_ARMED = 0x0, + MLX5_QPC_PM_STATE_REARM = 0x1, + MLX5_QPC_PM_STATE_RESERVED = 0x2, + MLX5_QPC_PM_STATE_MIGRATED = 0x3, +}; + +enum { + MLX5_QPC_END_PADDING_MODE_SCATTER_AS_IS = 0x0, + MLX5_QPC_END_PADDING_MODE_PAD_TO_CACHE_LINE_ALIGNMENT = 0x1, +}; + +enum { + MLX5_QPC_MTU_256_BYTES = 0x1, + MLX5_QPC_MTU_512_BYTES = 0x2, + MLX5_QPC_MTU_1K_BYTES = 0x3, + MLX5_QPC_MTU_2K_BYTES = 0x4, + MLX5_QPC_MTU_4K_BYTES = 0x5, + MLX5_QPC_MTU_RAW_ETHERNET_QP = 0x7, +}; + +enum { + MLX5_QPC_ATOMIC_MODE_IB_SPEC = 0x1, + MLX5_QPC_ATOMIC_MODE_ONLY_8B = 0x2, + MLX5_QPC_ATOMIC_MODE_UP_TO_8B = 0x3, + MLX5_QPC_ATOMIC_MODE_UP_TO_16B = 0x4, + MLX5_QPC_ATOMIC_MODE_UP_TO_32B = 0x5, + MLX5_QPC_ATOMIC_MODE_UP_TO_64B = 0x6, + MLX5_QPC_ATOMIC_MODE_UP_TO_128B = 0x7, + MLX5_QPC_ATOMIC_MODE_UP_TO_256B = 0x8, +}; + +enum { + MLX5_QPC_CS_REQ_DISABLE = 0x0, + MLX5_QPC_CS_REQ_UP_TO_32B = 0x11, + MLX5_QPC_CS_REQ_UP_TO_64B = 0x22, +}; + +enum { + MLX5_QPC_CS_RES_DISABLE = 0x0, + MLX5_QPC_CS_RES_UP_TO_32B = 0x1, + MLX5_QPC_CS_RES_UP_TO_64B = 0x2, +}; + +struct mlx5_ifc_qpc_bits { + u8 state[0x4]; + u8 reserved_0[0x4]; + u8 st[0x8]; + u8 reserved_1[0x3]; + u8 pm_state[0x2]; + u8 reserved_2[0x7]; + u8 end_padding_mode[0x2]; + u8 reserved_3[0x2]; + + u8 wq_signature[0x1]; + u8 block_lb_mc[0x1]; + u8 atomic_like_write_en[0x1]; + u8 latency_sensitive[0x1]; + u8 reserved_4[0x1]; + u8 drain_sigerr[0x1]; + u8 reserved_5[0x2]; + u8 pd[0x18]; + + u8 mtu[0x3]; + u8 log_msg_max[0x5]; + u8 reserved_6[0x1]; + u8 log_rq_size[0x4]; + u8 log_rq_stride[0x3]; + u8 no_sq[0x1]; + u8 log_sq_size[0x4]; + u8 reserved_7[0x6]; + u8 rlky[0x1]; + u8 reserved_8[0x4]; + + u8 counter_set_id[0x8]; + u8 uar_page[0x18]; + + u8 reserved_9[0x8]; + u8 user_index[0x18]; + + u8 reserved_10[0x3]; + u8 log_page_size[0x5]; + u8 remote_qpn[0x18]; + + struct mlx5_ifc_ads_bits primary_address_path; + + struct mlx5_ifc_ads_bits secondary_address_path; + + u8 log_ack_req_freq[0x4]; + u8 reserved_11[0x4]; + u8 log_sra_max[0x3]; + u8 reserved_12[0x2]; + u8 retry_count[0x3]; + u8 rnr_retry[0x3]; + u8 reserved_13[0x1]; + u8 fre[0x1]; + u8 cur_rnr_retry[0x3]; + u8 cur_retry_count[0x3]; + u8 reserved_14[0x5]; + + u8 reserved_15[0x20]; + + u8 reserved_16[0x8]; + u8 next_send_psn[0x18]; + + u8 reserved_17[0x8]; + u8 cqn_snd[0x18]; + + u8 reserved_18[0x40]; + + u8 reserved_19[0x8]; + u8 last_acked_psn[0x18]; + + u8 reserved_20[0x8]; + u8 ssn[0x18]; + + u8 reserved_21[0x8]; + u8 log_rra_max[0x3]; + u8 reserved_22[0x1]; + u8 atomic_mode[0x4]; + u8 rre[0x1]; + u8 rwe[0x1]; + u8 rae[0x1]; + u8 reserved_23[0x1]; + u8 page_offset[0x6]; + u8 reserved_24[0x3]; + u8 cd_slave_receive[0x1]; + u8 cd_slave_send[0x1]; + u8 cd_master[0x1]; + + u8 reserved_25[0x3]; + u8 min_rnr_nak[0x5]; + u8 next_rcv_psn[0x18]; + + u8 reserved_26[0x8]; + u8 xrcd[0x18]; + + u8 reserved_27[0x8]; + u8 cqn_rcv[0x18]; + + u8 dbr_addr[0x40]; + + u8 q_key[0x20]; + + u8 reserved_28[0x5]; + u8 rq_type[0x3]; + u8 srqn_rmpn[0x18]; + + u8 reserved_29[0x8]; + u8 rmsn[0x18]; + + u8 hw_sq_wqebb_counter[0x10]; + u8 sw_sq_wqebb_counter[0x10]; + + u8 hw_rq_counter[0x20]; + + u8 sw_rq_counter[0x20]; + + u8 reserved_30[0x20]; + + u8 reserved_31[0xf]; + u8 cgs[0x1]; + u8 cs_req[0x8]; + u8 cs_res[0x8]; + + u8 dc_access_key[0x40]; + + u8 reserved_32[0xc0]; +}; + +struct mlx5_ifc_roce_addr_layout_bits { + u8 source_l3_address[16][0x8]; + + u8 reserved_0[0x3]; + u8 vlan_valid[0x1]; + u8 vlan_id[0xc]; + u8 source_mac_47_32[0x10]; + + u8 source_mac_31_0[0x20]; + + u8 reserved_1[0x14]; + u8 roce_l3_type[0x4]; + u8 roce_version[0x8]; + + u8 reserved_2[0x20]; +}; + +union mlx5_ifc_hca_cap_union_bits { + struct mlx5_ifc_cmd_hca_cap_bits cmd_hca_cap; + struct mlx5_ifc_odp_cap_bits odp_cap; + struct mlx5_ifc_atomic_caps_bits atomic_caps; + struct mlx5_ifc_roce_cap_bits roce_cap; + struct mlx5_ifc_per_protocol_networking_offload_caps_bits per_protocol_networking_offload_caps; + struct mlx5_ifc_flow_table_nic_cap_bits flow_table_nic_cap; + u8 reserved_0[0x8000]; +}; + +enum { + MLX5_FLOW_CONTEXT_ACTION_ALLOW = 0x1, + MLX5_FLOW_CONTEXT_ACTION_DROP = 0x2, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST = 0x4, +}; + +struct mlx5_ifc_flow_context_bits { + u8 reserved_0[0x20]; + + u8 group_id[0x20]; + + u8 reserved_1[0x8]; + u8 flow_tag[0x18]; + + u8 reserved_2[0x10]; + u8 action[0x10]; + + u8 reserved_3[0x8]; + u8 destination_list_size[0x18]; + + u8 reserved_4[0x160]; + + struct mlx5_ifc_fte_match_param_bits match_value; + + u8 reserved_5[0x600]; + + struct mlx5_ifc_dest_format_struct_bits destination[0]; +}; + +enum { + MLX5_XRC_SRQC_STATE_GOOD = 0x0, + MLX5_XRC_SRQC_STATE_ERROR = 0x1, +}; + +struct mlx5_ifc_xrc_srqc_bits { + u8 state[0x4]; + u8 log_xrc_srq_size[0x4]; + u8 reserved_0[0x18]; + + u8 wq_signature[0x1]; + u8 cont_srq[0x1]; + u8 reserved_1[0x1]; + u8 rlky[0x1]; + u8 basic_cyclic_rcv_wqe[0x1]; + u8 log_rq_stride[0x3]; + u8 xrcd[0x18]; + + u8 page_offset[0x6]; + u8 reserved_2[0x2]; + u8 cqn[0x18]; + + u8 reserved_3[0x20]; + + u8 user_index_equal_xrc_srqn[0x1]; + u8 reserved_4[0x1]; + u8 log_page_size[0x6]; + u8 user_index[0x18]; + + u8 reserved_5[0x20]; + + u8 reserved_6[0x8]; + u8 pd[0x18]; + + u8 lwm[0x10]; + u8 wqe_cnt[0x10]; + + u8 reserved_7[0x40]; + + u8 db_record_addr_h[0x20]; + + u8 db_record_addr_l[0x1e]; + u8 reserved_8[0x2]; + + u8 reserved_9[0x80]; +}; + +struct mlx5_ifc_traffic_counter_bits { + u8 packets[0x40]; + + u8 octets[0x40]; +}; + +struct mlx5_ifc_tisc_bits { + u8 reserved_0[0xc]; + u8 prio[0x4]; + u8 reserved_1[0x10]; + + u8 reserved_2[0x100]; + + u8 reserved_3[0x8]; + u8 transport_domain[0x18]; + + u8 reserved_4[0x3c0]; +}; + +enum { + MLX5_TIRC_DISP_TYPE_DIRECT = 0x0, + MLX5_TIRC_DISP_TYPE_INDIRECT = 0x1, +}; + +enum { + MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO = 0x1, + MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO = 0x2, +}; + +enum { + MLX5_TIRC_RX_HASH_FN_HASH_NONE = 0x0, + MLX5_TIRC_RX_HASH_FN_HASH_INVERTED_XOR8 = 0x1, + MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ = 0x2, +}; + +enum { + MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_ = 0x1, + MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST_ = 0x2, +}; + +struct mlx5_ifc_tirc_bits { + u8 reserved_0[0x20]; + + u8 disp_type[0x4]; + u8 reserved_1[0x1c]; + + u8 reserved_2[0x40]; + + u8 reserved_3[0x4]; + u8 lro_timeout_period_usecs[0x10]; + u8 lro_enable_mask[0x4]; + u8 lro_max_ip_payload_size[0x8]; + + u8 reserved_4[0x40]; + + u8 reserved_5[0x8]; + u8 inline_rqn[0x18]; + + u8 rx_hash_symmetric[0x1]; + u8 reserved_6[0x1]; + u8 tunneled_offload_en[0x1]; + u8 reserved_7[0x5]; + u8 indirect_table[0x18]; + + u8 rx_hash_fn[0x4]; + u8 reserved_8[0x2]; + u8 self_lb_block[0x2]; + u8 transport_domain[0x18]; + + u8 rx_hash_toeplitz_key[10][0x20]; + + struct mlx5_ifc_rx_hash_field_select_bits rx_hash_field_selector_outer; + + struct mlx5_ifc_rx_hash_field_select_bits rx_hash_field_selector_inner; + + u8 reserved_9[0x4c0]; +}; + +enum { + MLX5_SRQC_STATE_GOOD = 0x0, + MLX5_SRQC_STATE_ERROR = 0x1, +}; + +struct mlx5_ifc_srqc_bits { + u8 state[0x4]; + u8 log_srq_size[0x4]; + u8 reserved_0[0x18]; + + u8 wq_signature[0x1]; + u8 cont_srq[0x1]; + u8 reserved_1[0x1]; + u8 rlky[0x1]; + u8 reserved_2[0x1]; + u8 log_rq_stride[0x3]; + u8 xrcd[0x18]; + + u8 page_offset[0x6]; + u8 reserved_3[0x2]; + u8 cqn[0x18]; + + u8 reserved_4[0x20]; + + u8 reserved_5[0x2]; + u8 log_page_size[0x6]; + u8 reserved_6[0x18]; + + u8 reserved_7[0x20]; + + u8 reserved_8[0x8]; + u8 pd[0x18]; + + u8 lwm[0x10]; + u8 wqe_cnt[0x10]; + + u8 reserved_9[0x40]; + + u8 db_record_addr_h[0x20]; + + u8 db_record_addr_l[0x1e]; + u8 reserved_10[0x2]; + + u8 reserved_11[0x80]; +}; + +enum { + MLX5_SQC_STATE_RST = 0x0, + MLX5_SQC_STATE_RDY = 0x1, + MLX5_SQC_STATE_ERR = 0x3, +}; + +struct mlx5_ifc_sqc_bits { + u8 rlky[0x1]; + u8 cd_master[0x1]; + u8 fre[0x1]; + u8 flush_in_error_en[0x1]; + u8 reserved_0[0x4]; + u8 state[0x4]; + u8 reserved_1[0x14]; + + u8 reserved_2[0x8]; + u8 user_index[0x18]; + + u8 reserved_3[0x8]; + u8 cqn[0x18]; + + u8 reserved_4[0xa0]; + + u8 tis_lst_sz[0x10]; + u8 reserved_5[0x10]; + + u8 reserved_6[0x40]; + + u8 reserved_7[0x8]; + u8 tis_num_0[0x18]; + + struct mlx5_ifc_wq_bits wq; +}; + +struct mlx5_ifc_rqtc_bits { + u8 reserved_0[0xa0]; + + u8 reserved_1[0x10]; + u8 rqt_max_size[0x10]; + + u8 reserved_2[0x10]; + u8 rqt_actual_size[0x10]; + + u8 reserved_3[0x6a0]; + + struct mlx5_ifc_rq_num_bits rq_num[0]; +}; + +enum { + MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE = 0x0, + MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_RMP = 0x1, +}; + +enum { + MLX5_RQC_STATE_RST = 0x0, + MLX5_RQC_STATE_RDY = 0x1, + MLX5_RQC_STATE_ERR = 0x3, +}; + +struct mlx5_ifc_rqc_bits { + u8 rlky[0x1]; + u8 reserved_0[0x2]; + u8 vsd[0x1]; + u8 mem_rq_type[0x4]; + u8 state[0x4]; + u8 reserved_1[0x1]; + u8 flush_in_error_en[0x1]; + u8 reserved_2[0x12]; + + u8 reserved_3[0x8]; + u8 user_index[0x18]; + + u8 reserved_4[0x8]; + u8 cqn[0x18]; + + u8 counter_set_id[0x8]; + u8 reserved_5[0x18]; + + u8 reserved_6[0x8]; + u8 rmpn[0x18]; + + u8 reserved_7[0xe0]; + + struct mlx5_ifc_wq_bits wq; +}; + +enum { + MLX5_RMPC_STATE_RDY = 0x1, + MLX5_RMPC_STATE_ERR = 0x3, +}; + +struct mlx5_ifc_rmpc_bits { + u8 reserved_0[0x8]; + u8 state[0x4]; + u8 reserved_1[0x14]; + + u8 basic_cyclic_rcv_wqe[0x1]; + u8 reserved_2[0x1f]; + + u8 reserved_3[0x140]; + + struct mlx5_ifc_wq_bits wq; +}; + +enum { + MLX5_NIC_VPORT_CONTEXT_ALLOWED_LIST_TYPE_CURRENT_UC_MAC_ADDRESS = 0x0, +}; + +struct mlx5_ifc_nic_vport_context_bits { + u8 reserved_0[0x1f]; + u8 roce_en[0x1]; + + u8 reserved_1[0x760]; + + u8 reserved_2[0x5]; + u8 allowed_list_type[0x3]; + u8 reserved_3[0xc]; + u8 allowed_list_size[0xc]; + + struct mlx5_ifc_mac_address_layout_bits permanent_address; + + u8 reserved_4[0x20]; + + u8 current_uc_mac_address[0][0x40]; +}; + +enum { + MLX5_MKC_ACCESS_MODE_PA = 0x0, + MLX5_MKC_ACCESS_MODE_MTT = 0x1, + MLX5_MKC_ACCESS_MODE_KLMS = 0x2, +}; + +struct mlx5_ifc_mkc_bits { + u8 reserved_0[0x1]; + u8 free[0x1]; + u8 reserved_1[0xd]; + u8 small_fence_on_rdma_read_response[0x1]; + u8 umr_en[0x1]; + u8 a[0x1]; + u8 rw[0x1]; + u8 rr[0x1]; + u8 lw[0x1]; + u8 lr[0x1]; + u8 access_mode[0x2]; + u8 reserved_2[0x8]; + + u8 qpn[0x18]; + u8 mkey_7_0[0x8]; + + u8 reserved_3[0x20]; + + u8 length64[0x1]; + u8 bsf_en[0x1]; + u8 sync_umr[0x1]; + u8 reserved_4[0x2]; + u8 expected_sigerr_count[0x1]; + u8 reserved_5[0x1]; + u8 en_rinval[0x1]; + u8 pd[0x18]; + + u8 start_addr[0x40]; + + u8 len[0x40]; + + u8 bsf_octword_size[0x20]; + + u8 reserved_6[0x80]; + + u8 translations_octword_size[0x20]; + + u8 reserved_7[0x1b]; + u8 log_page_size[0x5]; + + u8 reserved_8[0x20]; +}; + +struct mlx5_ifc_pkey_bits { + u8 reserved_0[0x10]; + u8 pkey[0x10]; +}; + +struct mlx5_ifc_array128_auto_bits { + u8 array128_auto[16][0x8]; +}; + +struct mlx5_ifc_hca_vport_context_bits { + u8 field_select[0x20]; + + u8 reserved_0[0xe0]; + + u8 sm_virt_aware[0x1]; + u8 has_smi[0x1]; + u8 has_raw[0x1]; + u8 grh_required[0x1]; + u8 reserved_1[0x10]; + u8 port_state_policy[0x4]; + u8 phy_port_state[0x4]; + u8 vport_state[0x4]; + + u8 reserved_2[0x60]; + + u8 port_guid[0x40]; + + u8 node_guid[0x40]; + + u8 cap_mask1[0x20]; + + u8 cap_mask1_field_select[0x20]; + + u8 cap_mask2[0x20]; + + u8 cap_mask2_field_select[0x20]; + + u8 reserved_3[0x80]; + + u8 lid[0x10]; + u8 reserved_4[0x4]; + u8 init_type_reply[0x4]; + u8 lmc[0x3]; + u8 subnet_timeout[0x5]; + + u8 sm_lid[0x10]; + u8 sm_sl[0x4]; + u8 reserved_5[0xc]; + + u8 qkey_violation_counter[0x10]; + u8 pkey_violation_counter[0x10]; + + u8 reserved_6[0xca0]; +}; + +enum { + MLX5_EQC_STATUS_OK = 0x0, + MLX5_EQC_STATUS_EQ_WRITE_FAILURE = 0xa, +}; + +enum { + MLX5_EQC_ST_ARMED = 0x9, + MLX5_EQC_ST_FIRED = 0xa, +}; + +struct mlx5_ifc_eqc_bits { + u8 status[0x4]; + u8 reserved_0[0x9]; + u8 ec[0x1]; + u8 oi[0x1]; + u8 reserved_1[0x5]; + u8 st[0x4]; + u8 reserved_2[0x8]; + + u8 reserved_3[0x20]; + + u8 reserved_4[0x14]; + u8 page_offset[0x6]; + u8 reserved_5[0x6]; + + u8 reserved_6[0x3]; + u8 log_eq_size[0x5]; + u8 uar_page[0x18]; + + u8 reserved_7[0x20]; + + u8 reserved_8[0x18]; + u8 intr[0x8]; + + u8 reserved_9[0x3]; + u8 log_page_size[0x5]; + u8 reserved_10[0x18]; + + u8 reserved_11[0x60]; + + u8 reserved_12[0x8]; + u8 consumer_counter[0x18]; + + u8 reserved_13[0x8]; + u8 producer_counter[0x18]; + + u8 reserved_14[0x80]; +}; + +enum { + MLX5_DCTC_STATE_ACTIVE = 0x0, + MLX5_DCTC_STATE_DRAINING = 0x1, + MLX5_DCTC_STATE_DRAINED = 0x2, +}; + +enum { + MLX5_DCTC_CS_RES_DISABLE = 0x0, + MLX5_DCTC_CS_RES_NA = 0x1, + MLX5_DCTC_CS_RES_UP_TO_64B = 0x2, +}; + +enum { + MLX5_DCTC_MTU_256_BYTES = 0x1, + MLX5_DCTC_MTU_512_BYTES = 0x2, + MLX5_DCTC_MTU_1K_BYTES = 0x3, + MLX5_DCTC_MTU_2K_BYTES = 0x4, + MLX5_DCTC_MTU_4K_BYTES = 0x5, +}; + +struct mlx5_ifc_dctc_bits { + u8 reserved_0[0x4]; + u8 state[0x4]; + u8 reserved_1[0x18]; + + u8 reserved_2[0x8]; + u8 user_index[0x18]; + + u8 reserved_3[0x8]; + u8 cqn[0x18]; + + u8 counter_set_id[0x8]; + u8 atomic_mode[0x4]; + u8 rre[0x1]; + u8 rwe[0x1]; + u8 rae[0x1]; + u8 atomic_like_write_en[0x1]; + u8 latency_sensitive[0x1]; + u8 rlky[0x1]; + u8 free_ar[0x1]; + u8 reserved_4[0xd]; + + u8 reserved_5[0x8]; + u8 cs_res[0x8]; + u8 reserved_6[0x3]; + u8 min_rnr_nak[0x5]; + u8 reserved_7[0x8]; + + u8 reserved_8[0x8]; + u8 srqn[0x18]; + + u8 reserved_9[0x8]; + u8 pd[0x18]; + + u8 tclass[0x8]; + u8 reserved_10[0x4]; + u8 flow_label[0x14]; + + u8 dc_access_key[0x40]; + + u8 reserved_11[0x5]; + u8 mtu[0x3]; + u8 port[0x8]; + u8 pkey_index[0x10]; + + u8 reserved_12[0x8]; + u8 my_addr_index[0x8]; + u8 reserved_13[0x8]; + u8 hop_limit[0x8]; + + u8 dc_access_key_violation_count[0x20]; + + u8 reserved_14[0x14]; + u8 dei_cfi[0x1]; + u8 eth_prio[0x3]; + u8 ecn[0x2]; + u8 dscp[0x6]; + + u8 reserved_15[0x40]; +}; + +enum { + MLX5_CQC_STATUS_OK = 0x0, + MLX5_CQC_STATUS_CQ_OVERFLOW = 0x9, + MLX5_CQC_STATUS_CQ_WRITE_FAIL = 0xa, +}; + +enum { + MLX5_CQC_CQE_SZ_64_BYTES = 0x0, + MLX5_CQC_CQE_SZ_128_BYTES = 0x1, +}; + +enum { + MLX5_CQC_ST_SOLICITED_NOTIFICATION_REQUEST_ARMED = 0x6, + MLX5_CQC_ST_NOTIFICATION_REQUEST_ARMED = 0x9, + MLX5_CQC_ST_FIRED = 0xa, +}; + +struct mlx5_ifc_cqc_bits { + u8 status[0x4]; + u8 reserved_0[0x4]; + u8 cqe_sz[0x3]; + u8 cc[0x1]; + u8 reserved_1[0x1]; + u8 scqe_break_moderation_en[0x1]; + u8 oi[0x1]; + u8 reserved_2[0x2]; + u8 cqe_zip_en[0x1]; + u8 mini_cqe_res_format[0x2]; + u8 st[0x4]; + u8 reserved_3[0x8]; + + u8 reserved_4[0x20]; + + u8 reserved_5[0x14]; + u8 page_offset[0x6]; + u8 reserved_6[0x6]; + + u8 reserved_7[0x3]; + u8 log_cq_size[0x5]; + u8 uar_page[0x18]; + + u8 reserved_8[0x4]; + u8 cq_period[0xc]; + u8 cq_max_count[0x10]; + + u8 reserved_9[0x18]; + u8 c_eqn[0x8]; + + u8 reserved_10[0x3]; + u8 log_page_size[0x5]; + u8 reserved_11[0x18]; + + u8 reserved_12[0x20]; + + u8 reserved_13[0x8]; + u8 last_notified_index[0x18]; + + u8 reserved_14[0x8]; + u8 last_solicit_index[0x18]; + + u8 reserved_15[0x8]; + u8 consumer_counter[0x18]; + + u8 reserved_16[0x8]; + u8 producer_counter[0x18]; + + u8 reserved_17[0x40]; + + u8 dbr_addr[0x40]; +}; + +union mlx5_ifc_cong_control_roce_ecn_auto_bits { + struct mlx5_ifc_cong_control_802_1qau_rp_bits cong_control_802_1qau_rp; + struct mlx5_ifc_cong_control_r_roce_ecn_rp_bits cong_control_r_roce_ecn_rp; + struct mlx5_ifc_cong_control_r_roce_ecn_np_bits cong_control_r_roce_ecn_np; + u8 reserved_0[0x800]; +}; + +struct mlx5_ifc_query_adapter_param_block_bits { + u8 reserved_0[0xe0]; + + u8 reserved_1[0x10]; + u8 vsd_vendor_id[0x10]; + + u8 vsd[208][0x8]; + + u8 vsd_contd_psid[16][0x8]; +}; + +union mlx5_ifc_modify_field_select_resize_field_select_auto_bits { + struct mlx5_ifc_modify_field_select_bits modify_field_select; + struct mlx5_ifc_resize_field_select_bits resize_field_select; + u8 reserved_0[0x20]; +}; + +union mlx5_ifc_field_select_802_1_r_roce_auto_bits { + struct mlx5_ifc_field_select_802_1qau_rp_bits field_select_802_1qau_rp; + struct mlx5_ifc_field_select_r_roce_rp_bits field_select_r_roce_rp; + struct mlx5_ifc_field_select_r_roce_np_bits field_select_r_roce_np; + u8 reserved_0[0x20]; +}; + +union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits { + struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits eth_802_3_cntrs_grp_data_layout; + struct mlx5_ifc_eth_2863_cntrs_grp_data_layout_bits eth_2863_cntrs_grp_data_layout; + struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout; + struct mlx5_ifc_eth_3635_cntrs_grp_data_layout_bits eth_3635_cntrs_grp_data_layout; + struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits eth_extended_cntrs_grp_data_layout; + struct mlx5_ifc_eth_per_prio_grp_data_layout_bits eth_per_prio_grp_data_layout; + struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits eth_per_traffic_grp_data_layout; + struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs; + u8 reserved_0[0x7c0]; +}; + +union mlx5_ifc_event_auto_bits { + struct mlx5_ifc_comp_event_bits comp_event; + struct mlx5_ifc_dct_events_bits dct_events; + struct mlx5_ifc_qp_events_bits qp_events; + struct mlx5_ifc_wqe_associated_page_fault_event_bits wqe_associated_page_fault_event; + struct mlx5_ifc_rdma_page_fault_event_bits rdma_page_fault_event; + struct mlx5_ifc_cq_error_bits cq_error; + struct mlx5_ifc_dropped_packet_logged_bits dropped_packet_logged; + struct mlx5_ifc_port_state_change_event_bits port_state_change_event; + struct mlx5_ifc_gpio_event_bits gpio_event; + struct mlx5_ifc_db_bf_congestion_event_bits db_bf_congestion_event; + struct mlx5_ifc_stall_vl_event_bits stall_vl_event; + struct mlx5_ifc_cmd_inter_comp_event_bits cmd_inter_comp_event; + u8 reserved_0[0xe0]; +}; + +struct mlx5_ifc_health_buffer_bits { + u8 reserved_0[0x100]; + + u8 assert_existptr[0x20]; + + u8 assert_callra[0x20]; + + u8 reserved_1[0x40]; + + u8 fw_version[0x20]; + + u8 hw_id[0x20]; + + u8 reserved_2[0x20]; + + u8 irisc_index[0x8]; + u8 synd[0x8]; + u8 ext_synd[0x10]; +}; + +struct mlx5_ifc_register_loopback_control_bits { + u8 no_lb[0x1]; + u8 reserved_0[0x7]; + u8 port[0x8]; + u8 reserved_1[0x10]; + + u8 reserved_2[0x60]; +}; + +struct mlx5_ifc_teardown_hca_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +enum { + MLX5_TEARDOWN_HCA_IN_PROFILE_GRACEFUL_CLOSE = 0x0, + MLX5_TEARDOWN_HCA_IN_PROFILE_PANIC_CLOSE = 0x1, +}; + +struct mlx5_ifc_teardown_hca_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x10]; + u8 profile[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_sqerr2rts_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_sqerr2rts_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; + + u8 opt_param_mask[0x20]; + + u8 reserved_4[0x20]; + + struct mlx5_ifc_qpc_bits qpc; + + u8 reserved_5[0x80]; +}; + +struct mlx5_ifc_sqd2rts_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_sqd2rts_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; + + u8 opt_param_mask[0x20]; + + u8 reserved_4[0x20]; + + struct mlx5_ifc_qpc_bits qpc; + + u8 reserved_5[0x80]; +}; + +struct mlx5_ifc_set_roce_address_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_set_roce_address_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 roce_address_index[0x10]; + u8 reserved_2[0x10]; + + u8 reserved_3[0x20]; + + struct mlx5_ifc_roce_addr_layout_bits roce_address; +}; + +struct mlx5_ifc_set_mad_demux_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +enum { + MLX5_SET_MAD_DEMUX_IN_DEMUX_MODE_PASS_ALL = 0x0, + MLX5_SET_MAD_DEMUX_IN_DEMUX_MODE_SELECTIVE = 0x2, +}; + +struct mlx5_ifc_set_mad_demux_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x20]; + + u8 reserved_3[0x6]; + u8 demux_mode[0x2]; + u8 reserved_4[0x18]; +}; + +struct mlx5_ifc_set_l2_table_entry_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_set_l2_table_entry_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x60]; + + u8 reserved_3[0x8]; + u8 table_index[0x18]; + + u8 reserved_4[0x20]; + + u8 reserved_5[0x13]; + u8 vlan_valid[0x1]; + u8 vlan[0xc]; + + struct mlx5_ifc_mac_address_layout_bits mac_address; + + u8 reserved_6[0xc0]; +}; + +struct mlx5_ifc_set_issi_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_set_issi_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x10]; + u8 current_issi[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_set_hca_cap_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_set_hca_cap_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + union mlx5_ifc_hca_cap_union_bits capability; +}; + +struct mlx5_ifc_set_fte_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_set_fte_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 table_type[0x8]; + u8 reserved_3[0x18]; + + u8 reserved_4[0x8]; + u8 table_id[0x18]; + + u8 reserved_5[0x40]; + + u8 flow_index[0x20]; + + u8 reserved_6[0xe0]; + + struct mlx5_ifc_flow_context_bits flow_context; +}; + +struct mlx5_ifc_rts2rts_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_rts2rts_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; + + u8 opt_param_mask[0x20]; + + u8 reserved_4[0x20]; + + struct mlx5_ifc_qpc_bits qpc; + + u8 reserved_5[0x80]; +}; + +struct mlx5_ifc_rtr2rts_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_rtr2rts_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; + + u8 opt_param_mask[0x20]; + + u8 reserved_4[0x20]; + + struct mlx5_ifc_qpc_bits qpc; + + u8 reserved_5[0x80]; +}; + +struct mlx5_ifc_rst2init_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_rst2init_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; + + u8 opt_param_mask[0x20]; + + u8 reserved_4[0x20]; + + struct mlx5_ifc_qpc_bits qpc; + + u8 reserved_5[0x80]; +}; + +struct mlx5_ifc_query_xrc_srq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry; + + u8 reserved_2[0x600]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_query_xrc_srq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 xrc_srqn[0x18]; + + u8 reserved_3[0x20]; +}; + +enum { + MLX5_QUERY_VPORT_STATE_OUT_STATE_DOWN = 0x0, + MLX5_QUERY_VPORT_STATE_OUT_STATE_UP = 0x1, +}; + +struct mlx5_ifc_query_vport_state_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x20]; + + u8 reserved_2[0x18]; + u8 admin_state[0x4]; + u8 state[0x4]; +}; + +enum { + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT = 0x0, +}; + +struct mlx5_ifc_query_vport_state_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_vport_counter_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_traffic_counter_bits received_errors; + + struct mlx5_ifc_traffic_counter_bits transmit_errors; + + struct mlx5_ifc_traffic_counter_bits received_ib_unicast; + + struct mlx5_ifc_traffic_counter_bits transmitted_ib_unicast; + + struct mlx5_ifc_traffic_counter_bits received_ib_multicast; + + struct mlx5_ifc_traffic_counter_bits transmitted_ib_multicast; + + struct mlx5_ifc_traffic_counter_bits received_eth_broadcast; + + struct mlx5_ifc_traffic_counter_bits transmitted_eth_broadcast; + + struct mlx5_ifc_traffic_counter_bits received_eth_unicast; + + struct mlx5_ifc_traffic_counter_bits transmitted_eth_unicast; + + struct mlx5_ifc_traffic_counter_bits received_eth_multicast; + + struct mlx5_ifc_traffic_counter_bits transmitted_eth_multicast; + + u8 reserved_2[0xa00]; +}; + +enum { + MLX5_QUERY_VPORT_COUNTER_IN_OP_MOD_VPORT_COUNTERS = 0x0, +}; + +struct mlx5_ifc_query_vport_counter_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + u8 reserved_3[0x60]; + + u8 clear[0x1]; + u8 reserved_4[0x1f]; + + u8 reserved_5[0x20]; +}; + +struct mlx5_ifc_query_tis_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_tisc_bits tis_context; +}; + +struct mlx5_ifc_query_tis_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 tisn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_tir_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0xc0]; + + struct mlx5_ifc_tirc_bits tir_context; +}; + +struct mlx5_ifc_query_tir_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 tirn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_srq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_srqc_bits srq_context_entry; + + u8 reserved_2[0x600]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_query_srq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 srqn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_sq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0xc0]; + + struct mlx5_ifc_sqc_bits sq_context; +}; + +struct mlx5_ifc_query_sq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 sqn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_special_contexts_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x20]; + + u8 resd_lkey[0x20]; +}; + +struct mlx5_ifc_query_special_contexts_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_query_rqt_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0xc0]; + + struct mlx5_ifc_rqtc_bits rqt_context; +}; + +struct mlx5_ifc_query_rqt_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 rqtn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_rq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0xc0]; + + struct mlx5_ifc_rqc_bits rq_context; +}; + +struct mlx5_ifc_query_rq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 rqn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_roce_address_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_roce_addr_layout_bits roce_address; +}; + +struct mlx5_ifc_query_roce_address_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 roce_address_index[0x10]; + u8 reserved_2[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_rmp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0xc0]; + + struct mlx5_ifc_rmpc_bits rmp_context; +}; + +struct mlx5_ifc_query_rmp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 rmpn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + u8 opt_param_mask[0x20]; + + u8 reserved_2[0x20]; + + struct mlx5_ifc_qpc_bits qpc; + + u8 reserved_3[0x80]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_query_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_q_counter_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + u8 rx_write_requests[0x20]; + + u8 reserved_2[0x20]; + + u8 rx_read_requests[0x20]; + + u8 reserved_3[0x20]; + + u8 rx_atomic_requests[0x20]; + + u8 reserved_4[0x20]; + + u8 rx_dct_connect[0x20]; + + u8 reserved_5[0x20]; + + u8 out_of_buffer[0x20]; + + u8 reserved_6[0x20]; + + u8 out_of_sequence[0x20]; + + u8 reserved_7[0x620]; +}; + +struct mlx5_ifc_query_q_counter_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x80]; + + u8 clear[0x1]; + u8 reserved_3[0x1f]; + + u8 reserved_4[0x18]; + u8 counter_set_id[0x8]; +}; + +struct mlx5_ifc_query_pages_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x10]; + u8 function_id[0x10]; + + u8 num_pages[0x20]; +}; + +enum { + MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES = 0x1, + MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES = 0x2, + MLX5_QUERY_PAGES_IN_OP_MOD_REGULAR_PAGES = 0x3, +}; + +struct mlx5_ifc_query_pages_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x10]; + u8 function_id[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_nic_vport_context_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_nic_vport_context_bits nic_vport_context; +}; + +struct mlx5_ifc_query_nic_vport_context_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + u8 reserved_3[0x5]; + u8 allowed_list_type[0x3]; + u8 reserved_4[0x18]; +}; + +struct mlx5_ifc_query_mkey_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_mkc_bits memory_key_mkey_entry; + + u8 reserved_2[0x600]; + + u8 bsf0_klm0_pas_mtt0_1[16][0x8]; + + u8 bsf1_klm1_pas_mtt2_3[16][0x8]; +}; + +struct mlx5_ifc_query_mkey_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 mkey_index[0x18]; + + u8 pg_access[0x1]; + u8 reserved_3[0x1f]; +}; + +struct mlx5_ifc_query_mad_demux_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + u8 mad_dumux_parameters_block[0x20]; +}; + +struct mlx5_ifc_query_mad_demux_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_query_l2_table_entry_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0xa0]; + + u8 reserved_2[0x13]; + u8 vlan_valid[0x1]; + u8 vlan[0xc]; + + struct mlx5_ifc_mac_address_layout_bits mac_address; + + u8 reserved_3[0xc0]; +}; + +struct mlx5_ifc_query_l2_table_entry_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x60]; + + u8 reserved_3[0x8]; + u8 table_index[0x18]; + + u8 reserved_4[0x140]; +}; + +struct mlx5_ifc_query_issi_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x10]; + u8 current_issi[0x10]; + + u8 reserved_2[0xa0]; + + u8 supported_issi_reserved[76][0x8]; + u8 supported_issi_dw0[0x20]; +}; + +struct mlx5_ifc_query_issi_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_query_hca_vport_pkey_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_pkey_bits pkey[0]; +}; + +struct mlx5_ifc_query_hca_vport_pkey_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + u8 reserved_3[0x10]; + u8 pkey_index[0x10]; +}; + +struct mlx5_ifc_query_hca_vport_gid_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x20]; + + u8 gids_num[0x10]; + u8 reserved_2[0x10]; + + struct mlx5_ifc_array128_auto_bits gid[0]; +}; + +struct mlx5_ifc_query_hca_vport_gid_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + u8 reserved_3[0x10]; + u8 gid_index[0x10]; +}; + +struct mlx5_ifc_query_hca_vport_context_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_hca_vport_context_bits hca_vport_context; +}; + +struct mlx5_ifc_query_hca_vport_context_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_hca_cap_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + union mlx5_ifc_hca_cap_union_bits capability; +}; + +struct mlx5_ifc_query_hca_cap_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_query_flow_table_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x80]; + + u8 reserved_2[0x8]; + u8 level[0x8]; + u8 reserved_3[0x8]; + u8 log_size[0x8]; + + u8 reserved_4[0x120]; +}; + +struct mlx5_ifc_query_flow_table_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 table_type[0x8]; + u8 reserved_3[0x18]; + + u8 reserved_4[0x8]; + u8 table_id[0x18]; + + u8 reserved_5[0x140]; +}; + +struct mlx5_ifc_query_fte_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x1c0]; + + struct mlx5_ifc_flow_context_bits flow_context; +}; + +struct mlx5_ifc_query_fte_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 table_type[0x8]; + u8 reserved_3[0x18]; + + u8 reserved_4[0x8]; + u8 table_id[0x18]; + + u8 reserved_5[0x40]; + + u8 flow_index[0x20]; + + u8 reserved_6[0xe0]; +}; + +enum { + MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_OUTER_HEADERS = 0x0, + MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS = 0x1, + MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_INNER_HEADERS = 0x2, +}; + +struct mlx5_ifc_query_flow_group_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0xa0]; + + u8 start_flow_index[0x20]; + + u8 reserved_2[0x20]; + + u8 end_flow_index[0x20]; + + u8 reserved_3[0xa0]; + + u8 reserved_4[0x18]; + u8 match_criteria_enable[0x8]; + + struct mlx5_ifc_fte_match_param_bits match_criteria; + + u8 reserved_5[0xe00]; +}; + +struct mlx5_ifc_query_flow_group_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 table_type[0x8]; + u8 reserved_3[0x18]; + + u8 reserved_4[0x8]; + u8 table_id[0x18]; + + u8 group_id[0x20]; + + u8 reserved_5[0x120]; +}; + +struct mlx5_ifc_query_eq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_eqc_bits eq_context_entry; + + u8 reserved_2[0x40]; + + u8 event_bitmask[0x40]; + + u8 reserved_3[0x580]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_query_eq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x18]; + u8 eq_number[0x8]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_dct_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_dctc_bits dct_context_entry; + + u8 reserved_2[0x180]; +}; + +struct mlx5_ifc_query_dct_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 dctn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_cq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_cqc_bits cq_context; + + u8 reserved_2[0x600]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_query_cq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 cqn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_cong_status_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x20]; + + u8 enable[0x1]; + u8 tag_enable[0x1]; + u8 reserved_2[0x1e]; +}; + +struct mlx5_ifc_query_cong_status_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x18]; + u8 priority[0x4]; + u8 cong_protocol[0x4]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_cong_statistics_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + u8 cur_flows[0x20]; + + u8 sum_flows[0x20]; + + u8 cnp_ignored_high[0x20]; + + u8 cnp_ignored_low[0x20]; + + u8 cnp_handled_high[0x20]; + + u8 cnp_handled_low[0x20]; + + u8 reserved_2[0x100]; + + u8 time_stamp_high[0x20]; + + u8 time_stamp_low[0x20]; + + u8 accumulators_period[0x20]; + + u8 ecn_marked_roce_packets_high[0x20]; + + u8 ecn_marked_roce_packets_low[0x20]; + + u8 cnps_sent_high[0x20]; + + u8 cnps_sent_low[0x20]; + + u8 reserved_3[0x560]; +}; + +struct mlx5_ifc_query_cong_statistics_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 clear[0x1]; + u8 reserved_2[0x1f]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_cong_params_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + union mlx5_ifc_cong_control_roce_ecn_auto_bits congestion_parameters; +}; + +struct mlx5_ifc_query_cong_params_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x1c]; + u8 cong_protocol[0x4]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_adapter_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_query_adapter_param_block_bits query_adapter_struct; +}; + +struct mlx5_ifc_query_adapter_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_qp_2rst_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_qp_2rst_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_qp_2err_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_qp_2err_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_page_fault_resume_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_page_fault_resume_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 error[0x1]; + u8 reserved_2[0x4]; + u8 rdma[0x1]; + u8 read_write[0x1]; + u8 req_res[0x1]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_nop_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_nop_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_modify_vport_state_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_vport_state_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + u8 reserved_3[0x18]; + u8 admin_state[0x4]; + u8 reserved_4[0x4]; +}; + +struct mlx5_ifc_modify_tis_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_tis_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 tisn[0x18]; + + u8 reserved_3[0x20]; + + u8 modify_bitmask[0x40]; + + u8 reserved_4[0x40]; + + struct mlx5_ifc_tisc_bits ctx; +}; + +struct mlx5_ifc_modify_tir_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_tir_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 tirn[0x18]; + + u8 reserved_3[0x20]; + + u8 modify_bitmask[0x40]; + + u8 reserved_4[0x40]; + + struct mlx5_ifc_tirc_bits ctx; +}; + +struct mlx5_ifc_modify_sq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_sq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 sq_state[0x4]; + u8 reserved_2[0x4]; + u8 sqn[0x18]; + + u8 reserved_3[0x20]; + + u8 modify_bitmask[0x40]; + + u8 reserved_4[0x40]; + + struct mlx5_ifc_sqc_bits ctx; +}; + +struct mlx5_ifc_modify_rqt_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_rqt_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 rqtn[0x18]; + + u8 reserved_3[0x20]; + + u8 modify_bitmask[0x40]; + + u8 reserved_4[0x40]; + + struct mlx5_ifc_rqtc_bits ctx; +}; + +struct mlx5_ifc_modify_rq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_rq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 rq_state[0x4]; + u8 reserved_2[0x4]; + u8 rqn[0x18]; + + u8 reserved_3[0x20]; + + u8 modify_bitmask[0x40]; + + u8 reserved_4[0x40]; + + struct mlx5_ifc_rqc_bits ctx; +}; + +struct mlx5_ifc_modify_rmp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_rmp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 rmp_state[0x4]; + u8 reserved_2[0x4]; + u8 rmpn[0x18]; + + u8 reserved_3[0x20]; + + u8 modify_bitmask[0x40]; + + u8 reserved_4[0x40]; + + struct mlx5_ifc_rmpc_bits ctx; +}; + +struct mlx5_ifc_modify_nic_vport_context_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_nic_vport_field_select_bits { + u8 reserved_0[0x1c]; + u8 permanent_address[0x1]; + u8 addresses_list[0x1]; + u8 roce_en[0x1]; + u8 reserved_1[0x1]; +}; + +struct mlx5_ifc_modify_nic_vport_context_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + struct mlx5_ifc_modify_nic_vport_field_select_bits field_select; + + u8 reserved_3[0x780]; + + struct mlx5_ifc_nic_vport_context_bits nic_vport_context; +}; + +struct mlx5_ifc_modify_hca_vport_context_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_hca_vport_context_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + u8 reserved_3[0x20]; + + struct mlx5_ifc_hca_vport_context_bits hca_vport_context; +}; + +struct mlx5_ifc_modify_cq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +enum { + MLX5_MODIFY_CQ_IN_OP_MOD_MODIFY_CQ = 0x0, + MLX5_MODIFY_CQ_IN_OP_MOD_RESIZE_CQ = 0x1, +}; + +struct mlx5_ifc_modify_cq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 cqn[0x18]; + + union mlx5_ifc_modify_field_select_resize_field_select_auto_bits modify_field_select_resize_field_select; + + struct mlx5_ifc_cqc_bits cq_context; + + u8 reserved_3[0x600]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_modify_cong_status_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_cong_status_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x18]; + u8 priority[0x4]; + u8 cong_protocol[0x4]; + + u8 enable[0x1]; + u8 tag_enable[0x1]; + u8 reserved_3[0x1e]; +}; + +struct mlx5_ifc_modify_cong_params_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_cong_params_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x1c]; + u8 cong_protocol[0x4]; + + union mlx5_ifc_field_select_802_1_r_roce_auto_bits field_select; + + u8 reserved_3[0x80]; + + union mlx5_ifc_cong_control_roce_ecn_auto_bits congestion_parameters; +}; + +struct mlx5_ifc_manage_pages_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 output_num_entries[0x20]; + + u8 reserved_1[0x20]; + + u8 pas[0][0x40]; +}; + +enum { + MLX5_MANAGE_PAGES_IN_OP_MOD_ALLOCATION_FAIL = 0x0, + MLX5_MANAGE_PAGES_IN_OP_MOD_ALLOCATION_SUCCESS = 0x1, + MLX5_MANAGE_PAGES_IN_OP_MOD_HCA_RETURN_PAGES = 0x2, +}; + +struct mlx5_ifc_manage_pages_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x10]; + u8 function_id[0x10]; + + u8 input_num_entries[0x20]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_mad_ifc_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + u8 response_mad_packet[256][0x8]; +}; + +struct mlx5_ifc_mad_ifc_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 remote_lid[0x10]; + u8 reserved_2[0x8]; + u8 port[0x8]; + + u8 reserved_3[0x20]; + + u8 mad[256][0x8]; +}; + +struct mlx5_ifc_init_hca_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_init_hca_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_init2rtr_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_init2rtr_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; + + u8 opt_param_mask[0x20]; + + u8 reserved_4[0x20]; + + struct mlx5_ifc_qpc_bits qpc; + + u8 reserved_5[0x80]; +}; + +struct mlx5_ifc_init2init_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_init2init_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; + + u8 opt_param_mask[0x20]; + + u8 reserved_4[0x20]; + + struct mlx5_ifc_qpc_bits qpc; + + u8 reserved_5[0x80]; +}; + +struct mlx5_ifc_get_dropped_packet_log_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + u8 packet_headers_log[128][0x8]; + + u8 packet_syndrome[64][0x8]; +}; + +struct mlx5_ifc_get_dropped_packet_log_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_gen_eqe_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x18]; + u8 eq_number[0x8]; + + u8 reserved_3[0x20]; + + u8 eqe[64][0x8]; +}; + +struct mlx5_ifc_gen_eq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_enable_hca_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x20]; +}; + +struct mlx5_ifc_enable_hca_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x10]; + u8 function_id[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_drain_dct_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_drain_dct_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 dctn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_disable_hca_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x20]; +}; + +struct mlx5_ifc_disable_hca_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x10]; + u8 function_id[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_detach_from_mcg_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_detach_from_mcg_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; + + u8 multicast_gid[16][0x8]; +}; + +struct mlx5_ifc_destroy_xrc_srq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_xrc_srq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 xrc_srqn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_tis_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_tis_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 tisn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_tir_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_tir_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 tirn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_srq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_srq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 srqn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_sq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_sq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 sqn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_rqt_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_rqt_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 rqtn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_rq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_rq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 rqn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_rmp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_rmp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 rmpn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_psv_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_psv_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 psvn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_mkey_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_mkey_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 mkey_index[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_flow_table_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_flow_table_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 table_type[0x8]; + u8 reserved_3[0x18]; + + u8 reserved_4[0x8]; + u8 table_id[0x18]; + + u8 reserved_5[0x140]; +}; + +struct mlx5_ifc_destroy_flow_group_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_flow_group_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 table_type[0x8]; + u8 reserved_3[0x18]; + + u8 reserved_4[0x8]; + u8 table_id[0x18]; + + u8 group_id[0x20]; + + u8 reserved_5[0x120]; +}; + +struct mlx5_ifc_destroy_eq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_eq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x18]; + u8 eq_number[0x8]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_dct_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_dct_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 dctn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_cq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_cq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 cqn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_delete_vxlan_udp_dport_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_delete_vxlan_udp_dport_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x20]; + + u8 reserved_3[0x10]; + u8 vxlan_udp_port[0x10]; +}; + +struct mlx5_ifc_delete_l2_table_entry_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_delete_l2_table_entry_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x60]; + + u8 reserved_3[0x8]; + u8 table_index[0x18]; + + u8 reserved_4[0x140]; +}; + +struct mlx5_ifc_delete_fte_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_delete_fte_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 table_type[0x8]; + u8 reserved_3[0x18]; + + u8 reserved_4[0x8]; + u8 table_id[0x18]; + + u8 reserved_5[0x40]; + + u8 flow_index[0x20]; + + u8 reserved_6[0xe0]; +}; + +struct mlx5_ifc_dealloc_xrcd_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_dealloc_xrcd_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 xrcd[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_dealloc_uar_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_dealloc_uar_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 uar[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_dealloc_transport_domain_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_dealloc_transport_domain_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 transport_domain[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_dealloc_q_counter_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_dealloc_q_counter_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x18]; + u8 counter_set_id[0x8]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_dealloc_pd_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_dealloc_pd_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 pd[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_create_xrc_srq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 xrc_srqn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_xrc_srq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry; + + u8 reserved_3[0x600]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_create_tis_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 tisn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_tis_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0xc0]; + + struct mlx5_ifc_tisc_bits ctx; +}; + +struct mlx5_ifc_create_tir_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 tirn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_tir_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0xc0]; + + struct mlx5_ifc_tirc_bits ctx; +}; + +struct mlx5_ifc_create_srq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 srqn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_srq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + struct mlx5_ifc_srqc_bits srq_context_entry; + + u8 reserved_3[0x600]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_create_sq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 sqn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_sq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0xc0]; + + struct mlx5_ifc_sqc_bits ctx; +}; + +struct mlx5_ifc_create_rqt_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 rqtn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_rqt_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0xc0]; + + struct mlx5_ifc_rqtc_bits rqt_context; +}; + +struct mlx5_ifc_create_rq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 rqn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_rq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0xc0]; + + struct mlx5_ifc_rqc_bits ctx; +}; + +struct mlx5_ifc_create_rmp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 rmpn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_rmp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0xc0]; + + struct mlx5_ifc_rmpc_bits ctx; +}; + +struct mlx5_ifc_create_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 qpn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 opt_param_mask[0x20]; + + u8 reserved_3[0x20]; + + struct mlx5_ifc_qpc_bits qpc; + + u8 reserved_4[0x80]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_create_psv_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + u8 reserved_2[0x8]; + u8 psv0_index[0x18]; + + u8 reserved_3[0x8]; + u8 psv1_index[0x18]; + + u8 reserved_4[0x8]; + u8 psv2_index[0x18]; + + u8 reserved_5[0x8]; + u8 psv3_index[0x18]; +}; + +struct mlx5_ifc_create_psv_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 num_psv[0x4]; + u8 reserved_2[0x4]; + u8 pd[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_create_mkey_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 mkey_index[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_mkey_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x20]; + + u8 pg_access[0x1]; + u8 reserved_3[0x1f]; + + struct mlx5_ifc_mkc_bits memory_key_mkey_entry; + + u8 reserved_4[0x80]; + + u8 translations_octword_actual_size[0x20]; + + u8 reserved_5[0x560]; + + u8 klm_pas_mtt[0][0x20]; +}; + +struct mlx5_ifc_create_flow_table_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 table_id[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_flow_table_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 table_type[0x8]; + u8 reserved_3[0x18]; + + u8 reserved_4[0x20]; + + u8 reserved_5[0x8]; + u8 level[0x8]; + u8 reserved_6[0x8]; + u8 log_size[0x8]; + + u8 reserved_7[0x120]; +}; + +struct mlx5_ifc_create_flow_group_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 group_id[0x18]; + + u8 reserved_2[0x20]; +}; + +enum { + MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS = 0x0, + MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS = 0x1, + MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS = 0x2, +}; + +struct mlx5_ifc_create_flow_group_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 table_type[0x8]; + u8 reserved_3[0x18]; + + u8 reserved_4[0x8]; + u8 table_id[0x18]; + + u8 reserved_5[0x20]; + + u8 start_flow_index[0x20]; + + u8 reserved_6[0x20]; + + u8 end_flow_index[0x20]; + + u8 reserved_7[0xa0]; + + u8 reserved_8[0x18]; + u8 match_criteria_enable[0x8]; + + struct mlx5_ifc_fte_match_param_bits match_criteria; + + u8 reserved_9[0xe00]; +}; + +struct mlx5_ifc_create_eq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x18]; + u8 eq_number[0x8]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_eq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + struct mlx5_ifc_eqc_bits eq_context_entry; + + u8 reserved_3[0x40]; + + u8 event_bitmask[0x40]; + + u8 reserved_4[0x580]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_create_dct_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 dctn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_dct_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + struct mlx5_ifc_dctc_bits dct_context_entry; + + u8 reserved_3[0x180]; +}; + +struct mlx5_ifc_create_cq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 cqn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_cq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + struct mlx5_ifc_cqc_bits cq_context; + + u8 reserved_3[0x600]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_config_int_moderation_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x4]; + u8 min_delay[0xc]; + u8 int_vector[0x10]; + + u8 reserved_2[0x20]; +}; + +enum { + MLX5_CONFIG_INT_MODERATION_IN_OP_MOD_WRITE = 0x0, + MLX5_CONFIG_INT_MODERATION_IN_OP_MOD_READ = 0x1, +}; + +struct mlx5_ifc_config_int_moderation_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x4]; + u8 min_delay[0xc]; + u8 int_vector[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_attach_to_mcg_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_attach_to_mcg_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; + + u8 multicast_gid[16][0x8]; +}; + +struct mlx5_ifc_arm_xrc_srq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +enum { + MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ = 0x1, +}; + +struct mlx5_ifc_arm_xrc_srq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 xrc_srqn[0x18]; + + u8 reserved_3[0x10]; + u8 lwm[0x10]; +}; + +struct mlx5_ifc_arm_rq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +enum { + MLX5_ARM_RQ_IN_OP_MOD_SRQ_ = 0x1, +}; + +struct mlx5_ifc_arm_rq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 srq_number[0x18]; + + u8 reserved_3[0x10]; + u8 lwm[0x10]; +}; + +struct mlx5_ifc_arm_dct_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_arm_dct_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 dct_number[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_alloc_xrcd_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 xrcd[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_alloc_xrcd_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_alloc_uar_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 uar[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_alloc_uar_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_alloc_transport_domain_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 transport_domain[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_alloc_transport_domain_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_alloc_q_counter_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x18]; + u8 counter_set_id[0x8]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_alloc_q_counter_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_alloc_pd_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 pd[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_alloc_pd_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_add_vxlan_udp_dport_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_add_vxlan_udp_dport_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x20]; + + u8 reserved_3[0x10]; + u8 vxlan_udp_port[0x10]; +}; + +struct mlx5_ifc_access_register_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + u8 register_data[0][0x20]; +}; + +enum { + MLX5_ACCESS_REGISTER_IN_OP_MOD_WRITE = 0x0, + MLX5_ACCESS_REGISTER_IN_OP_MOD_READ = 0x1, +}; + +struct mlx5_ifc_access_register_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x10]; + u8 register_id[0x10]; + + u8 argument[0x20]; + + u8 register_data[0][0x20]; +}; + +struct mlx5_ifc_sltp_reg_bits { + u8 status[0x4]; + u8 version[0x4]; + u8 local_port[0x8]; + u8 pnat[0x2]; + u8 reserved_0[0x2]; + u8 lane[0x4]; + u8 reserved_1[0x8]; + + u8 reserved_2[0x20]; + + u8 reserved_3[0x7]; + u8 polarity[0x1]; + u8 ob_tap0[0x8]; + u8 ob_tap1[0x8]; + u8 ob_tap2[0x8]; + + u8 reserved_4[0xc]; + u8 ob_preemp_mode[0x4]; + u8 ob_reg[0x8]; + u8 ob_bias[0x8]; + + u8 reserved_5[0x20]; +}; + +struct mlx5_ifc_slrg_reg_bits { + u8 status[0x4]; + u8 version[0x4]; + u8 local_port[0x8]; + u8 pnat[0x2]; + u8 reserved_0[0x2]; + u8 lane[0x4]; + u8 reserved_1[0x8]; + + u8 time_to_link_up[0x10]; + u8 reserved_2[0xc]; + u8 grade_lane_speed[0x4]; + + u8 grade_version[0x8]; + u8 grade[0x18]; + + u8 reserved_3[0x4]; + u8 height_grade_type[0x4]; + u8 height_grade[0x18]; + + u8 height_dz[0x10]; + u8 height_dv[0x10]; + + u8 reserved_4[0x10]; + u8 height_sigma[0x10]; + + u8 reserved_5[0x20]; + + u8 reserved_6[0x4]; + u8 phase_grade_type[0x4]; + u8 phase_grade[0x18]; + + u8 reserved_7[0x8]; + u8 phase_eo_pos[0x8]; + u8 reserved_8[0x8]; + u8 phase_eo_neg[0x8]; + + u8 ffe_set_tested[0x10]; + u8 test_errors_per_lane[0x10]; +}; + +struct mlx5_ifc_pvlc_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 reserved_2[0x1c]; + u8 vl_hw_cap[0x4]; + + u8 reserved_3[0x1c]; + u8 vl_admin[0x4]; + + u8 reserved_4[0x1c]; + u8 vl_operational[0x4]; +}; + +struct mlx5_ifc_pude_reg_bits { + u8 swid[0x8]; + u8 local_port[0x8]; + u8 reserved_0[0x4]; + u8 admin_status[0x4]; + u8 reserved_1[0x4]; + u8 oper_status[0x4]; + + u8 reserved_2[0x60]; +}; + +struct mlx5_ifc_ptys_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0xd]; + u8 proto_mask[0x3]; + + u8 reserved_2[0x40]; + + u8 eth_proto_capability[0x20]; + + u8 ib_link_width_capability[0x10]; + u8 ib_proto_capability[0x10]; + + u8 reserved_3[0x20]; + + u8 eth_proto_admin[0x20]; + + u8 ib_link_width_admin[0x10]; + u8 ib_proto_admin[0x10]; + + u8 reserved_4[0x20]; + + u8 eth_proto_oper[0x20]; + + u8 ib_link_width_oper[0x10]; + u8 ib_proto_oper[0x10]; + + u8 reserved_5[0x20]; + + u8 eth_proto_lp_advertise[0x20]; + + u8 reserved_6[0x60]; +}; + +struct mlx5_ifc_ptas_reg_bits { + u8 reserved_0[0x20]; + + u8 algorithm_options[0x10]; + u8 reserved_1[0x4]; + u8 repetitions_mode[0x4]; + u8 num_of_repetitions[0x8]; + + u8 grade_version[0x8]; + u8 height_grade_type[0x4]; + u8 phase_grade_type[0x4]; + u8 height_grade_weight[0x8]; + u8 phase_grade_weight[0x8]; + + u8 gisim_measure_bits[0x10]; + u8 adaptive_tap_measure_bits[0x10]; + + u8 ber_bath_high_error_threshold[0x10]; + u8 ber_bath_mid_error_threshold[0x10]; + + u8 ber_bath_low_error_threshold[0x10]; + u8 one_ratio_high_threshold[0x10]; + + u8 one_ratio_high_mid_threshold[0x10]; + u8 one_ratio_low_mid_threshold[0x10]; + + u8 one_ratio_low_threshold[0x10]; + u8 ndeo_error_threshold[0x10]; + + u8 mixer_offset_step_size[0x10]; + u8 reserved_2[0x8]; + u8 mix90_phase_for_voltage_bath[0x8]; + + u8 mixer_offset_start[0x10]; + u8 mixer_offset_end[0x10]; + + u8 reserved_3[0x15]; + u8 ber_test_time[0xb]; +}; + +struct mlx5_ifc_pspa_reg_bits { + u8 swid[0x8]; + u8 local_port[0x8]; + u8 sub_port[0x8]; + u8 reserved_0[0x8]; + + u8 reserved_1[0x20]; +}; + +struct mlx5_ifc_pqdr_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x5]; + u8 prio[0x3]; + u8 reserved_2[0x6]; + u8 mode[0x2]; + + u8 reserved_3[0x20]; + + u8 reserved_4[0x10]; + u8 min_threshold[0x10]; + + u8 reserved_5[0x10]; + u8 max_threshold[0x10]; + + u8 reserved_6[0x10]; + u8 mark_probability_denominator[0x10]; + + u8 reserved_7[0x60]; +}; + +struct mlx5_ifc_ppsc_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 reserved_2[0x60]; + + u8 reserved_3[0x1c]; + u8 wrps_admin[0x4]; + + u8 reserved_4[0x1c]; + u8 wrps_status[0x4]; + + u8 reserved_5[0x8]; + u8 up_threshold[0x8]; + u8 reserved_6[0x8]; + u8 down_threshold[0x8]; + + u8 reserved_7[0x20]; + + u8 reserved_8[0x1c]; + u8 srps_admin[0x4]; + + u8 reserved_9[0x1c]; + u8 srps_status[0x4]; + + u8 reserved_10[0x40]; +}; + +struct mlx5_ifc_pplr_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 reserved_2[0x8]; + u8 lb_cap[0x8]; + u8 reserved_3[0x8]; + u8 lb_en[0x8]; +}; + +struct mlx5_ifc_pplm_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 reserved_2[0x20]; + + u8 port_profile_mode[0x8]; + u8 static_port_profile[0x8]; + u8 active_port_profile[0x8]; + u8 reserved_3[0x8]; + + u8 retransmission_active[0x8]; + u8 fec_mode_active[0x18]; + + u8 reserved_4[0x20]; +}; + +struct mlx5_ifc_ppcnt_reg_bits { + u8 swid[0x8]; + u8 local_port[0x8]; + u8 pnat[0x2]; + u8 reserved_0[0x8]; + u8 grp[0x6]; + + u8 clr[0x1]; + u8 reserved_1[0x1c]; + u8 prio_tc[0x3]; + + union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits counter_set; +}; + +struct mlx5_ifc_ppad_reg_bits { + u8 reserved_0[0x3]; + u8 single_mac[0x1]; + u8 reserved_1[0x4]; + u8 local_port[0x8]; + u8 mac_47_32[0x10]; + + u8 mac_31_0[0x20]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_pmtu_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 max_mtu[0x10]; + u8 reserved_2[0x10]; + + u8 admin_mtu[0x10]; + u8 reserved_3[0x10]; + + u8 oper_mtu[0x10]; + u8 reserved_4[0x10]; +}; + +struct mlx5_ifc_pmpr_reg_bits { + u8 reserved_0[0x8]; + u8 module[0x8]; + u8 reserved_1[0x10]; + + u8 reserved_2[0x18]; + u8 attenuation_5g[0x8]; + + u8 reserved_3[0x18]; + u8 attenuation_7g[0x8]; + + u8 reserved_4[0x18]; + u8 attenuation_12g[0x8]; +}; + +struct mlx5_ifc_pmpe_reg_bits { + u8 reserved_0[0x8]; + u8 module[0x8]; + u8 reserved_1[0xc]; + u8 module_status[0x4]; + + u8 reserved_2[0x60]; +}; + +struct mlx5_ifc_pmpc_reg_bits { + u8 module_state_updated[32][0x8]; +}; + +struct mlx5_ifc_pmlpn_reg_bits { + u8 reserved_0[0x4]; + u8 mlpn_status[0x4]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 e[0x1]; + u8 reserved_2[0x1f]; +}; + +struct mlx5_ifc_pmlp_reg_bits { + u8 rxtx[0x1]; + u8 reserved_0[0x7]; + u8 local_port[0x8]; + u8 reserved_1[0x8]; + u8 width[0x8]; + + u8 lane0_module_mapping[0x20]; + + u8 lane1_module_mapping[0x20]; + + u8 lane2_module_mapping[0x20]; + + u8 lane3_module_mapping[0x20]; + + u8 reserved_2[0x160]; +}; + +struct mlx5_ifc_pmaos_reg_bits { + u8 reserved_0[0x8]; + u8 module[0x8]; + u8 reserved_1[0x4]; + u8 admin_status[0x4]; + u8 reserved_2[0x4]; + u8 oper_status[0x4]; + + u8 ase[0x1]; + u8 ee[0x1]; + u8 reserved_3[0x1c]; + u8 e[0x2]; + + u8 reserved_4[0x40]; +}; + +struct mlx5_ifc_plpc_reg_bits { + u8 reserved_0[0x4]; + u8 profile_id[0xc]; + u8 reserved_1[0x4]; + u8 proto_mask[0x4]; + u8 reserved_2[0x8]; + + u8 reserved_3[0x10]; + u8 lane_speed[0x10]; + + u8 reserved_4[0x17]; + u8 lpbf[0x1]; + u8 fec_mode_policy[0x8]; + + u8 retransmission_capability[0x8]; + u8 fec_mode_capability[0x18]; + + u8 retransmission_support_admin[0x8]; + u8 fec_mode_support_admin[0x18]; + + u8 retransmission_request_admin[0x8]; + u8 fec_mode_request_admin[0x18]; + + u8 reserved_5[0x80]; +}; + +struct mlx5_ifc_plib_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x8]; + u8 ib_port[0x8]; + + u8 reserved_2[0x60]; +}; + +struct mlx5_ifc_plbf_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0xd]; + u8 lbf_mode[0x3]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_pipg_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 dic[0x1]; + u8 reserved_2[0x19]; + u8 ipg[0x4]; + u8 reserved_3[0x2]; +}; + +struct mlx5_ifc_pifr_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 reserved_2[0xe0]; + + u8 port_filter[8][0x20]; + + u8 port_filter_update_en[8][0x20]; +}; + +struct mlx5_ifc_pfcc_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 ppan[0x4]; + u8 reserved_2[0x4]; + u8 prio_mask_tx[0x8]; + u8 reserved_3[0x8]; + u8 prio_mask_rx[0x8]; + + u8 pptx[0x1]; + u8 aptx[0x1]; + u8 reserved_4[0x6]; + u8 pfctx[0x8]; + u8 reserved_5[0x10]; + + u8 pprx[0x1]; + u8 aprx[0x1]; + u8 reserved_6[0x6]; + u8 pfcrx[0x8]; + u8 reserved_7[0x10]; + + u8 reserved_8[0x80]; +}; + +struct mlx5_ifc_pelc_reg_bits { + u8 op[0x4]; + u8 reserved_0[0x4]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 op_admin[0x8]; + u8 op_capability[0x8]; + u8 op_request[0x8]; + u8 op_active[0x8]; + + u8 admin[0x40]; + + u8 capability[0x40]; + + u8 request[0x40]; + + u8 active[0x40]; + + u8 reserved_2[0x80]; +}; + +struct mlx5_ifc_peir_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 reserved_2[0xc]; + u8 error_count[0x4]; + u8 reserved_3[0x10]; + + u8 reserved_4[0xc]; + u8 lane[0x4]; + u8 reserved_5[0x8]; + u8 error_type[0x8]; +}; + +struct mlx5_ifc_pcap_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 port_capability_mask[4][0x20]; +}; + +struct mlx5_ifc_paos_reg_bits { + u8 swid[0x8]; + u8 local_port[0x8]; + u8 reserved_0[0x4]; + u8 admin_status[0x4]; + u8 reserved_1[0x4]; + u8 oper_status[0x4]; + + u8 ase[0x1]; + u8 ee[0x1]; + u8 reserved_2[0x1c]; + u8 e[0x2]; + + u8 reserved_3[0x40]; +}; + +struct mlx5_ifc_pamp_reg_bits { + u8 reserved_0[0x8]; + u8 opamp_group[0x8]; + u8 reserved_1[0xc]; + u8 opamp_group_type[0x4]; + + u8 start_index[0x10]; + u8 reserved_2[0x4]; + u8 num_of_indices[0xc]; + + u8 index_data[18][0x10]; +}; + +struct mlx5_ifc_lane_2_module_mapping_bits { + u8 reserved_0[0x6]; + u8 rx_lane[0x2]; + u8 reserved_1[0x6]; + u8 tx_lane[0x2]; + u8 reserved_2[0x8]; + u8 module[0x8]; +}; + +struct mlx5_ifc_bufferx_reg_bits { + u8 reserved_0[0x6]; + u8 lossy[0x1]; + u8 epsb[0x1]; + u8 reserved_1[0xc]; + u8 size[0xc]; + + u8 xoff_threshold[0x10]; + u8 xon_threshold[0x10]; +}; + +struct mlx5_ifc_set_node_in_bits { + u8 node_description[64][0x8]; +}; + +struct mlx5_ifc_register_power_settings_bits { + u8 reserved_0[0x18]; + u8 power_settings_level[0x8]; + + u8 reserved_1[0x60]; +}; + +struct mlx5_ifc_register_host_endianness_bits { + u8 he[0x1]; + u8 reserved_0[0x1f]; + + u8 reserved_1[0x60]; +}; + +struct mlx5_ifc_umr_pointer_desc_argument_bits { + u8 reserved_0[0x20]; + + u8 mkey[0x20]; + + u8 addressh_63_32[0x20]; + + u8 addressl_31_0[0x20]; +}; + +struct mlx5_ifc_ud_adrs_vector_bits { + u8 dc_key[0x40]; + + u8 ext[0x1]; + u8 reserved_0[0x7]; + u8 destination_qp_dct[0x18]; + + u8 static_rate[0x4]; + u8 sl_eth_prio[0x4]; + u8 fl[0x1]; + u8 mlid[0x7]; + u8 rlid_udp_sport[0x10]; + + u8 reserved_1[0x20]; + + u8 rmac_47_16[0x20]; + + u8 rmac_15_0[0x10]; + u8 tclass[0x8]; + u8 hop_limit[0x8]; + + u8 reserved_2[0x1]; + u8 grh[0x1]; + u8 reserved_3[0x2]; + u8 src_addr_index[0x8]; + u8 flow_label[0x14]; + + u8 rgid_rip[16][0x8]; +}; + +struct mlx5_ifc_pages_req_event_bits { + u8 reserved_0[0x10]; + u8 function_id[0x10]; + + u8 num_pages[0x20]; + + u8 reserved_1[0xa0]; +}; + +struct mlx5_ifc_eqe_bits { + u8 reserved_0[0x8]; + u8 event_type[0x8]; + u8 reserved_1[0x8]; + u8 event_sub_type[0x8]; + + u8 reserved_2[0xe0]; + + union mlx5_ifc_event_auto_bits event_data; + + u8 reserved_3[0x10]; + u8 signature[0x8]; + u8 reserved_4[0x7]; + u8 owner[0x1]; +}; + +enum { + MLX5_CMD_QUEUE_ENTRY_TYPE_PCIE_CMD_IF_TRANSPORT = 0x7, +}; + +struct mlx5_ifc_cmd_queue_entry_bits { + u8 type[0x8]; + u8 reserved_0[0x18]; + + u8 input_length[0x20]; + + u8 input_mailbox_pointer_63_32[0x20]; + + u8 input_mailbox_pointer_31_9[0x17]; + u8 reserved_1[0x9]; + + u8 command_input_inline_data[16][0x8]; + + u8 command_output_inline_data[16][0x8]; + + u8 output_mailbox_pointer_63_32[0x20]; + + u8 output_mailbox_pointer_31_9[0x17]; + u8 reserved_2[0x9]; + + u8 output_length[0x20]; + + u8 token[0x8]; + u8 signature[0x8]; + u8 reserved_3[0x8]; + u8 status[0x7]; + u8 ownership[0x1]; +}; + +struct mlx5_ifc_cmd_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 command_output[0x20]; +}; + +struct mlx5_ifc_cmd_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 command[0][0x20]; +}; + +struct mlx5_ifc_cmd_if_box_bits { + u8 mailbox_data[512][0x8]; + + u8 reserved_0[0x180]; + + u8 next_pointer_63_32[0x20]; + + u8 next_pointer_31_10[0x16]; + u8 reserved_1[0xa]; + + u8 block_number[0x20]; + + u8 reserved_2[0x8]; + u8 token[0x8]; + u8 ctrl_signature[0x8]; + u8 signature[0x8]; +}; + +struct mlx5_ifc_mtt_bits { + u8 ptag_63_32[0x20]; + + u8 ptag_31_8[0x18]; + u8 reserved_0[0x6]; + u8 wr_en[0x1]; + u8 rd_en[0x1]; +}; + +enum { + MLX5_INITIAL_SEG_NIC_INTERFACE_FULL_DRIVER = 0x0, + MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED = 0x1, + MLX5_INITIAL_SEG_NIC_INTERFACE_NO_DRAM_NIC = 0x2, +}; + +enum { + MLX5_INITIAL_SEG_NIC_INTERFACE_SUPPORTED_FULL_DRIVER = 0x0, + MLX5_INITIAL_SEG_NIC_INTERFACE_SUPPORTED_DISABLED = 0x1, + MLX5_INITIAL_SEG_NIC_INTERFACE_SUPPORTED_NO_DRAM_NIC = 0x2, +}; + +enum { + MLX5_INITIAL_SEG_HEALTH_SYNDROME_FW_INTERNAL_ERR = 0x1, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_DEAD_IRISC = 0x7, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_HW_FATAL_ERR = 0x8, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_FW_CRC_ERR = 0x9, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_FETCH_PCI_ERR = 0xa, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_PAGE_ERR = 0xb, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_ASYNCHRONOUS_EQ_BUF_OVERRUN = 0xc, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_EQ_IN_ERR = 0xd, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_EQ_INV = 0xe, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_FFSER_ERR = 0xf, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_HIGH_TEMP_ERR = 0x10, +}; + +struct mlx5_ifc_initial_seg_bits { + u8 fw_rev_minor[0x10]; + u8 fw_rev_major[0x10]; + + u8 cmd_interface_rev[0x10]; + u8 fw_rev_subminor[0x10]; + + u8 reserved_0[0x40]; + + u8 cmdq_phy_addr_63_32[0x20]; + + u8 cmdq_phy_addr_31_12[0x14]; + u8 reserved_1[0x2]; + u8 nic_interface[0x2]; + u8 log_cmdq_size[0x4]; + u8 log_cmdq_stride[0x4]; + + u8 command_doorbell_vector[0x20]; + + u8 reserved_2[0xf00]; + + u8 initializing[0x1]; + u8 reserved_3[0x4]; + u8 nic_interface_supported[0x3]; + u8 reserved_4[0x18]; + + struct mlx5_ifc_health_buffer_bits health_buffer; + + u8 no_dram_nic_offset[0x20]; + + u8 reserved_5[0x6e40]; + + u8 reserved_6[0x1f]; + u8 clear_int[0x1]; + + u8 health_syndrome[0x8]; + u8 health_counter[0x18]; + + u8 reserved_7[0x17fc0]; +}; + +union mlx5_ifc_ports_control_registers_document_bits { + struct mlx5_ifc_bufferx_reg_bits bufferx_reg; + struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout; + struct mlx5_ifc_eth_2863_cntrs_grp_data_layout_bits eth_2863_cntrs_grp_data_layout; + struct mlx5_ifc_eth_3635_cntrs_grp_data_layout_bits eth_3635_cntrs_grp_data_layout; + struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits eth_802_3_cntrs_grp_data_layout; + struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits eth_extended_cntrs_grp_data_layout; + struct mlx5_ifc_eth_per_prio_grp_data_layout_bits eth_per_prio_grp_data_layout; + struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits eth_per_traffic_grp_data_layout; + struct mlx5_ifc_lane_2_module_mapping_bits lane_2_module_mapping; + struct mlx5_ifc_pamp_reg_bits pamp_reg; + struct mlx5_ifc_paos_reg_bits paos_reg; + struct mlx5_ifc_pcap_reg_bits pcap_reg; + struct mlx5_ifc_peir_reg_bits peir_reg; + struct mlx5_ifc_pelc_reg_bits pelc_reg; + struct mlx5_ifc_pfcc_reg_bits pfcc_reg; + struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs; + struct mlx5_ifc_pifr_reg_bits pifr_reg; + struct mlx5_ifc_pipg_reg_bits pipg_reg; + struct mlx5_ifc_plbf_reg_bits plbf_reg; + struct mlx5_ifc_plib_reg_bits plib_reg; + struct mlx5_ifc_plpc_reg_bits plpc_reg; + struct mlx5_ifc_pmaos_reg_bits pmaos_reg; + struct mlx5_ifc_pmlp_reg_bits pmlp_reg; + struct mlx5_ifc_pmlpn_reg_bits pmlpn_reg; + struct mlx5_ifc_pmpc_reg_bits pmpc_reg; + struct mlx5_ifc_pmpe_reg_bits pmpe_reg; + struct mlx5_ifc_pmpr_reg_bits pmpr_reg; + struct mlx5_ifc_pmtu_reg_bits pmtu_reg; + struct mlx5_ifc_ppad_reg_bits ppad_reg; + struct mlx5_ifc_ppcnt_reg_bits ppcnt_reg; + struct mlx5_ifc_pplm_reg_bits pplm_reg; + struct mlx5_ifc_pplr_reg_bits pplr_reg; + struct mlx5_ifc_ppsc_reg_bits ppsc_reg; + struct mlx5_ifc_pqdr_reg_bits pqdr_reg; + struct mlx5_ifc_pspa_reg_bits pspa_reg; + struct mlx5_ifc_ptas_reg_bits ptas_reg; + struct mlx5_ifc_ptys_reg_bits ptys_reg; + struct mlx5_ifc_pude_reg_bits pude_reg; + struct mlx5_ifc_pvlc_reg_bits pvlc_reg; + struct mlx5_ifc_slrg_reg_bits slrg_reg; + struct mlx5_ifc_sltp_reg_bits sltp_reg; + u8 reserved_0[0x60e0]; +}; + +union mlx5_ifc_debug_enhancements_document_bits { + struct mlx5_ifc_health_buffer_bits health_buffer; + u8 reserved_0[0x200]; +}; + +union mlx5_ifc_uplink_pci_interface_document_bits { + struct mlx5_ifc_initial_seg_bits initial_seg; + u8 reserved_0[0x20060]; }; #endif /* MLX5_IFC_H */ diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 310b5f7fd6ae..f079fb1a31f7 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -134,13 +134,21 @@ enum { enum { MLX5_WQE_CTRL_CQ_UPDATE = 2 << 2, + MLX5_WQE_CTRL_CQ_UPDATE_AND_EQE = 3 << 2, MLX5_WQE_CTRL_SOLICITED = 1 << 1, }; enum { + MLX5_SEND_WQE_DS = 16, MLX5_SEND_WQE_BB = 64, }; +#define MLX5_SEND_WQEBB_NUM_DS (MLX5_SEND_WQE_BB / MLX5_SEND_WQE_DS) + +enum { + MLX5_SEND_WQE_MAX_WQEBBS = 16, +}; + enum { MLX5_WQE_FMR_PERM_LOCAL_READ = 1 << 27, MLX5_WQE_FMR_PERM_LOCAL_WRITE = 1 << 28, @@ -200,6 +208,23 @@ struct mlx5_wqe_ctrl_seg { #define MLX5_WQE_CTRL_WQE_INDEX_MASK 0x00ffff00 #define MLX5_WQE_CTRL_WQE_INDEX_SHIFT 8 +enum { + MLX5_ETH_WQE_L3_INNER_CSUM = 1 << 4, + MLX5_ETH_WQE_L4_INNER_CSUM = 1 << 5, + MLX5_ETH_WQE_L3_CSUM = 1 << 6, + MLX5_ETH_WQE_L4_CSUM = 1 << 7, +}; + +struct mlx5_wqe_eth_seg { + u8 rsvd0[4]; + u8 cs_flags; + u8 rsvd1; + __be16 mss; + __be32 rsvd2; + __be16 inline_hdr_sz; + u8 inline_hdr_start[2]; +}; + struct mlx5_wqe_xrc_seg { __be32 xrc_srqn; u8 rsvd[12]; -- cgit v1.2.3 From 938fe83c8dcbbf294d167e6163200a8540ae43c4 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 28 May 2015 22:28:41 +0300 Subject: net/mlx5_core: New device capabilities handling - Query all supported types of dev caps on driver load. - Store the Cap data outbox per cap type into driver private data. - Introduce new Macros to access/dump stored caps (using the auto generated data types). - Obsolete SW representation of dev caps (no need for SW copy for each cap). - Modify IB driver to use new macros for checking caps. Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 66 ++++++++++++++++++++++++++++++++++++++++----- include/linux/mlx5/driver.h | 58 +++++---------------------------------- 2 files changed, 65 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index feebed7b392b..4ee52bf1f959 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -59,6 +59,8 @@ #define MLX5_FLD_SZ_BYTES(typ, fld) (__mlx5_bit_sz(typ, fld) / 8) #define MLX5_ST_SZ_BYTES(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 8) #define MLX5_ST_SZ_DW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 32) +#define MLX5_UN_SZ_BYTES(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 8) +#define MLX5_UN_SZ_DW(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 32) #define MLX5_BYTE_OFF(typ, fld) (__mlx5_bit_off(typ, fld) / 8) #define MLX5_ADDR_OF(typ, p, fld) ((char *)(p) + MLX5_BYTE_OFF(typ, fld)) @@ -322,13 +324,6 @@ enum { MLX5_CAP_OFF_CMDIF_CSUM = 46, }; -enum { - HCA_CAP_OPMOD_GET_MAX = 0, - HCA_CAP_OPMOD_GET_CUR = 1, - HCA_CAP_OPMOD_GET_ODP_MAX = 4, - HCA_CAP_OPMOD_GET_ODP_CUR = 5 -}; - struct mlx5_inbox_hdr { __be16 opcode; u8 rsvd[4]; @@ -1101,4 +1096,61 @@ enum { MLX5_RQC_RQ_TYPE_MEMORY_RQ_RPM = 0x1, }; +/* MLX5 DEV CAPs */ + +/* TODO: EAT.ME */ +enum mlx5_cap_mode { + HCA_CAP_OPMOD_GET_MAX = 0, + HCA_CAP_OPMOD_GET_CUR = 1, +}; + +enum mlx5_cap_type { + MLX5_CAP_GENERAL = 0, + MLX5_CAP_ETHERNET_OFFLOADS, + MLX5_CAP_ODP, + MLX5_CAP_ATOMIC, + MLX5_CAP_ROCE, + MLX5_CAP_IPOIB_OFFLOADS, + MLX5_CAP_EOIB_OFFLOADS, + MLX5_CAP_FLOW_TABLE, + /* NUM OF CAP Types */ + MLX5_CAP_NUM +}; + +/* GET Dev Caps macros */ +#define MLX5_CAP_GEN(mdev, cap) \ + MLX5_GET(cmd_hca_cap, mdev->hca_caps_cur[MLX5_CAP_GENERAL], cap) + +#define MLX5_CAP_GEN_MAX(mdev, cap) \ + MLX5_GET(cmd_hca_cap, mdev->hca_caps_max[MLX5_CAP_GENERAL], cap) + +#define MLX5_CAP_ETH(mdev, cap) \ + MLX5_GET(per_protocol_networking_offload_caps,\ + mdev->hca_caps_cur[MLX5_CAP_ETHERNET_OFFLOADS], cap) + +#define MLX5_CAP_ETH_MAX(mdev, cap) \ + MLX5_GET(per_protocol_networking_offload_caps,\ + mdev->hca_caps_max[MLX5_CAP_ETHERNET_OFFLOADS], cap) + +#define MLX5_CAP_ROCE(mdev, cap) \ + MLX5_GET(roce_cap, mdev->hca_caps_cur[MLX5_CAP_ROCE], cap) + +#define MLX5_CAP_ROCE_MAX(mdev, cap) \ + MLX5_GET(roce_cap, mdev->hca_caps_max[MLX5_CAP_ROCE], cap) + +#define MLX5_CAP_ATOMIC(mdev, cap) \ + MLX5_GET(atomic_caps, mdev->hca_caps_cur[MLX5_CAP_ATOMIC], cap) + +#define MLX5_CAP_ATOMIC_MAX(mdev, cap) \ + MLX5_GET(atomic_caps, mdev->hca_caps_max[MLX5_CAP_ATOMIC], cap) + +#define MLX5_CAP_FLOWTABLE(mdev, cap) \ + MLX5_GET(flow_table_nic_cap, mdev->hca_caps_cur[MLX5_CAP_FLOW_TABLE], cap) + +#define MLX5_CAP_FLOWTABLE_MAX(mdev, cap) \ + MLX5_GET(flow_table_nic_cap, mdev->hca_caps_max[MLX5_CAP_FLOW_TABLE], cap) + +#define MLX5_CAP_ODP(mdev, cap)\ + MLX5_GET(odp_cap, mdev->hca_caps_cur[MLX5_CAP_ODP], cap) + #endif /* MLX5_DEVICE_H */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 3fd4fdc1ba16..6b9199163633 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -268,55 +268,7 @@ struct mlx5_cmd { struct mlx5_port_caps { int gid_table_len; int pkey_table_len; -}; - -struct mlx5_general_caps { - u8 log_max_eq; - u8 log_max_cq; - u8 log_max_qp; - u8 log_max_mkey; - u8 log_max_pd; - u8 log_max_srq; - u8 log_max_mrw_sz; - u8 log_max_bsf_list_size; - u8 log_max_klm_list_size; - u32 max_cqes; - int max_wqes; - u32 max_eqes; - u32 max_indirection; - int max_sq_desc_sz; - int max_rq_desc_sz; - int max_dc_sq_desc_sz; - u64 flags; - u16 stat_rate_support; - int log_max_msg; - int num_ports; - u8 log_max_ra_res_qp; - u8 log_max_ra_req_qp; - int max_srq_wqes; - int bf_reg_size; - int bf_regs_per_page; - struct mlx5_port_caps port[MLX5_MAX_PORTS]; - u8 ext_port_cap[MLX5_MAX_PORTS]; - int max_vf; - u32 reserved_lkey; - u8 local_ca_ack_delay; - u8 log_max_mcg; - u32 max_qp_mcg; - int min_page_sz; - int pd_cap; - u32 max_qp_counters; - u32 pkey_table_size; - u8 log_max_ra_req_dc; - u8 log_max_ra_res_dc; - u32 uar_sz; - u8 min_log_pg_sz; - u8 log_max_xrcd; - u16 log_uar_page_sz; -}; - -struct mlx5_caps { - struct mlx5_general_caps gen; + u8 ext_port_cap; }; struct mlx5_cmd_mailbox { @@ -521,7 +473,9 @@ struct mlx5_core_dev { u8 rev_id; char board_id[MLX5_BOARD_ID_LEN]; struct mlx5_cmd cmd; - struct mlx5_caps caps; + struct mlx5_port_caps port_caps[MLX5_MAX_PORTS]; + u32 hca_caps_cur[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; + u32 hca_caps_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; phys_addr_t iseg_base; struct mlx5_init_seg __iomem *iseg; void (*event) (struct mlx5_core_dev *dev, @@ -651,8 +605,8 @@ void mlx5_cmd_use_events(struct mlx5_core_dev *dev); void mlx5_cmd_use_polling(struct mlx5_core_dev *dev); int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr); int mlx5_cmd_status_to_err_v2(void *ptr); -int mlx5_core_get_caps(struct mlx5_core_dev *dev, struct mlx5_caps *caps, - u16 opmod); +int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type, + enum mlx5_cap_mode cap_mode); int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size, -- cgit v1.2.3 From adb0c9545bce6f1b1d563e988e6ee5531861d449 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 28 May 2015 22:28:42 +0300 Subject: net/mlx5_core: Implement access functions of ptys register fields Those registers will be used by the ethtool to set/get settings. Signed-off-by: Rana Shahout Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 6b9199163633..266d5498a270 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -504,6 +504,11 @@ enum { MLX5_COMP_EQ_SIZE = 1024, }; +enum { + MLX5_PTYS_IB = 1 << 0, + MLX5_PTYS_EN = 1 << 2, +}; + struct mlx5_db_pgdir { struct list_head list; DECLARE_BITMAP(bitmap, MLX5_DB_PER_PAGE); @@ -686,7 +691,16 @@ void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in, void *data_out, int size_out, u16 reg_num, int arg, int write); + int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps); +int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, + int ptys_size, int proto_mask); +int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev, + u32 *proto_cap, int proto_mask); +int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, + u32 *proto_admin, int proto_mask); +int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin, + int proto_mask); int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -- cgit v1.2.3 From 4c916a798058c1acf5a980438416020932c24aca Mon Sep 17 00:00:00 2001 From: Rana Shahout Date: Thu, 28 May 2015 22:28:43 +0300 Subject: net/mlx5_core: Implement get/set port status Implemet get/set port status low level functions to be exposed by the netdev. Signed-off-by: Rana Shahout Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 266d5498a270..6438444ab361 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -149,6 +149,11 @@ enum mlx5_dev_event { MLX5_DEV_EVENT_CLIENT_REREG, }; +enum mlx5_port_status { + MLX5_PORT_UP = 1 << 1, + MLX5_PORT_DOWN = 1 << 2, +}; + struct mlx5_uuar_info { struct mlx5_uar *uars; int num_uars; @@ -701,6 +706,9 @@ int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, u32 *proto_admin, int proto_mask); int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin, int proto_mask); +int mlx5_set_port_status(struct mlx5_core_dev *dev, + enum mlx5_port_status status); +int mlx5_query_port_status(struct mlx5_core_dev *dev, u8 *status); int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -- cgit v1.2.3 From 90b3e38d048f09b22fb50bcd460cea65fd00b2d7 Mon Sep 17 00:00:00 2001 From: Rana Shahout Date: Thu, 28 May 2015 22:28:44 +0300 Subject: net/mlx5_core: Modify CQ moderation parameters Introduce mlx5_core_modify_cq_moderation() to be used by the netdev, to set hardware coalescing. Signed-off-by: Rana Shahout Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/mlx5/cq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 2695ced222df..abc4767695e4 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -169,6 +169,9 @@ int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, struct mlx5_query_cq_mbox_out *out); int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, struct mlx5_modify_cq_mbox_in *in, int in_sz); +int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev, + struct mlx5_core_cq *cq, u16 cq_period, + u16 cq_max_count); int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); -- cgit v1.2.3 From e725440e75da8c4d617a31c4e38216acc55c24e3 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 28 May 2015 22:28:45 +0300 Subject: net/mlx5_core: Set/Query port MTU commands Introduce set/Query low level functions to access MTU in hardware. To be used by the netdev. Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 6438444ab361..51738472657e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -710,6 +710,10 @@ int mlx5_set_port_status(struct mlx5_core_dev *dev, enum mlx5_port_status status); int mlx5_query_port_status(struct mlx5_core_dev *dev, u8 *status); +int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu); +int mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu); +int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu); + int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, -- cgit v1.2.3 From afb736e9330ad6b2b6935d2f53ded784eb73f12d Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 28 May 2015 22:28:47 +0300 Subject: net/mlx5: Ethernet resource handling files This patch contains the resource handling files: - flow_table.c: This file contains the code to handle the low level API to configure hardware flow table. It is separated from the flow_table_en.c, because it will be used in the future by Raw Ethernet QP in mlx5_ib too. - en_flow_table.[ch]: Ethernet flow steering handling. The flow table object contain a mapping between flow specs and TIRs. This mechanism will be used also to configure e-switch in the future, when SR-IOV support will be added. - transobj.[ch] - Low level functions to create/modify/destroy the transport objects: RQ/SQ/TIR/TIS - vport.[ch] - Handle attributes of a virtual port (vPort) in the embedded switch. Currently this switch is a passthrough, until SR-IOV support will be added. Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/mlx5/flow_table.h | 54 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 include/linux/mlx5/flow_table.h (limited to 'include/linux') diff --git a/include/linux/mlx5/flow_table.h b/include/linux/mlx5/flow_table.h new file mode 100644 index 000000000000..5f922c6d4fc2 --- /dev/null +++ b/include/linux/mlx5/flow_table.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_FLOW_TABLE_H +#define MLX5_FLOW_TABLE_H + +#include + +struct mlx5_flow_table_group { + u8 log_sz; + u8 match_criteria_enable; + u32 match_criteria[MLX5_ST_SZ_DW(fte_match_param)]; +}; + +void *mlx5_create_flow_table(struct mlx5_core_dev *dev, u8 level, u8 table_type, + u16 num_groups, + struct mlx5_flow_table_group *group); +void mlx5_destroy_flow_table(void *flow_table); +int mlx5_add_flow_table_entry(void *flow_table, u8 match_criteria_enable, + void *match_criteria, void *flow_context, + u32 *flow_index); +void mlx5_del_flow_table_entry(void *flow_table, u32 flow_index); +u32 mlx5_get_flow_table_id(void *flow_table); + +#endif /* MLX5_FLOW_TABLE_H */ -- cgit v1.2.3 From f62b8bb8f2d30582f30f51e85a8c0e1260125d7e Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 28 May 2015 22:28:48 +0300 Subject: net/mlx5: Extend mlx5_core to support ConnectX-4 Ethernet functionality This is the Ethernet part of the driver for the Mellanox ConnectX(R)-4 Single/Dual-Port Adapter supporting 100Gb/s with VPI. The driver extends the existing mlx5 driver with Ethernet functionality. This patch contains the driver entry points but does not include transmit and receive (see the previous patch in the series) routines. It also adds the option MLX5_CORE_EN to Kconfig to enable/disable the Ethernet functionality. Currently, Kconfig is programmed to make Ethernet and Infiniband functionality mutally exclusive. Also changed MLX5_INFINIBAND to be depandant on MLX5_CORE instead of selecting it, since MLX5_CORE could be selected without MLX5_INFINIBAND being selected. Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 19 +++++++++++++++++++ include/linux/mlx5/driver.h | 1 + 2 files changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 4ee52bf1f959..b288c538347a 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1153,4 +1153,23 @@ enum mlx5_cap_type { #define MLX5_CAP_ODP(mdev, cap)\ MLX5_GET(odp_cap, mdev->hca_caps_cur[MLX5_CAP_ODP], cap) +enum { + MLX5_CMD_STAT_OK = 0x0, + MLX5_CMD_STAT_INT_ERR = 0x1, + MLX5_CMD_STAT_BAD_OP_ERR = 0x2, + MLX5_CMD_STAT_BAD_PARAM_ERR = 0x3, + MLX5_CMD_STAT_BAD_SYS_STATE_ERR = 0x4, + MLX5_CMD_STAT_BAD_RES_ERR = 0x5, + MLX5_CMD_STAT_RES_BUSY = 0x6, + MLX5_CMD_STAT_LIM_ERR = 0x8, + MLX5_CMD_STAT_BAD_RES_STATE_ERR = 0x9, + MLX5_CMD_STAT_IX_ERR = 0xa, + MLX5_CMD_STAT_NO_RES_ERR = 0xf, + MLX5_CMD_STAT_BAD_INP_LEN_ERR = 0x50, + MLX5_CMD_STAT_BAD_OUTP_LEN_ERR = 0x51, + MLX5_CMD_STAT_BAD_QP_STATE_ERR = 0x10, + MLX5_CMD_STAT_BAD_PKT_ERR = 0x30, + MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR = 0x40, +}; + #endif /* MLX5_DEVICE_H */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 51738472657e..7fa26f03acc1 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -489,6 +489,7 @@ struct mlx5_core_dev { struct mlx5_priv priv; struct mlx5_profile *profile; atomic_t num_qps; + u32 issi; }; struct mlx5_db { -- cgit v1.2.3 From 07d783fd830a49008f3b2764ae7b6033ee1bf329 Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Tue, 19 May 2015 11:44:46 +0200 Subject: staging: goldfish: Fix pointer cast for 32 bits As the first argument of gf_write64() was of type unsigned long, and as some calls to gf_write64() were casting the first argument from void * to u64 the compiler and/or sparse were printing warnings for casts of wrong sizes when compiling for i386. This patch changes the type of the first argument of gf_write64() to const void *, and update calls to the function. This change fixed the warnings and allowed to remove casts from 3 calls to gf_write64(). In addition gf_write64() was renamed to gf_write_ptr() as the name was misleading because it only writes 32 bits on 32 bit systems. gf_write_dma_addr() was added to handle dma_addr_t values which is used at drivers/staging/goldfish/goldfish_audio.c. Signed-off-by: Dan Carpenter Signed-off-by: Peter Senna Tschudin Signed-off-by: Greg Kroah-Hartman --- include/linux/goldfish.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/goldfish.h b/include/linux/goldfish.h index 569236e6b2bc..93e080b39cf6 100644 --- a/include/linux/goldfish.h +++ b/include/linux/goldfish.h @@ -3,13 +3,24 @@ /* Helpers for Goldfish virtual platform */ -static inline void gf_write64(unsigned long data, - void __iomem *portl, void __iomem *porth) +static inline void gf_write_ptr(const void *ptr, void __iomem *portl, + void __iomem *porth) { - writel((u32)data, portl); + writel((u32)(unsigned long)ptr, portl); #ifdef CONFIG_64BIT - writel(data>>32, porth); + writel((unsigned long)ptr >> 32, porth); #endif } +static inline void gf_write_dma_addr(const dma_addr_t addr, + void __iomem *portl, + void __iomem *porth) +{ + writel((u32)addr, portl); +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + writel(addr >> 32, porth); +#endif +} + + #endif /* __LINUX_GOLDFISH_H */ -- cgit v1.2.3 From b144ce2d37619e05afdb0a15676500d76a64b1be Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 27 May 2015 17:17:27 -0700 Subject: mei: fix up uuid matching A previous commit, c93b76b34b4d ("mei: bus: report also uuid in module alias") caused a build error as I missed applying a needed patch to add some macros to uapi/linux/uuid.h. Instead of those additional macros, change the mei code to use the existing uuid structure directly. Fixes: c93b76b34b4d Cc: Tomas Winkler Cc: Samuel Ortiz Reported-by: Stephen Rothwell Signed-off-by: Greg Kroah-Hartman --- include/linux/mod_devicetable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 2d2b2b571d61..048c270822f9 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -614,7 +614,7 @@ struct ipack_device_id { */ struct mei_cl_device_id { char name[MEI_CL_NAME_SIZE]; - __u8 uuid[16]; + uuid_le uuid; kernel_ulong_t driver_info; }; -- cgit v1.2.3 From 10081fb532a2a2216b7d8e4ad585c985075b6f60 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 1 May 2015 15:23:50 +0900 Subject: lib: introduce crc_t10dif_update() This introduces crc_t10dif_update() which enables to calculate CRC for a block which straddles multiple SG elements by calling multiple times. This also converts crc_t10dif() to use crc_t10dif_update() as they are almost same. (remove extra function call in crc_t10dif() and crc_t10dif_update - Tim + Herbert) Signed-off-by: Akinobu Mita Acked-by: Martin K. Petersen Cc: Tim Chen Cc: Herbert Xu Cc: "David S. Miller" Cc: linux-crypto@vger.kernel.org Cc: Nicholas Bellinger Cc: Sagi Grimberg Cc: "Martin K. Petersen" Cc: Christoph Hellwig Cc: "James E.J. Bottomley" Cc: target-devel@vger.kernel.org Signed-off-by: Nicholas Bellinger --- include/linux/crc-t10dif.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/crc-t10dif.h b/include/linux/crc-t10dif.h index cf53d0773ce3..d81961e9e37d 100644 --- a/include/linux/crc-t10dif.h +++ b/include/linux/crc-t10dif.h @@ -9,5 +9,6 @@ extern __u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len); extern __u16 crc_t10dif(unsigned char const *, size_t); +extern __u16 crc_t10dif_update(__u16 crc, unsigned char const *, size_t); #endif -- cgit v1.2.3 From c66fa19c405a36673d4aab13658c8246413d5c0f Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Sun, 31 May 2015 09:30:16 +0300 Subject: net/mlx4: Add EQ pool Previously, mlx4_en allocated EQs and used them exclusively. This affected RoCE performance, as applications which are events sensitive were limited to use only the legacy EQs. Change that by introducing an EQ pool. This pool is managed by mlx4_core. EQs are assigned to ports (when there are limited number of EQs, multiple ports could be assigned to the same EQs). An exception to this rule is the ASYNC EQ which handles various events. Legacy EQs are completely removed as all EQs could be shared. When a consumer (mlx4_ib/mlx4_en) requests an EQ, it asks for EQ serving on a specific port. The core driver calculates which EQ should be assigned to that request. Because IRQs are shared between IB and Ethernet modules, their names only include the PCI device BDF address. Signed-off-by: Matan Barak Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 83e80ab94500..ad31e476873f 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -46,8 +46,9 @@ #define MAX_MSIX_P_PORT 17 #define MAX_MSIX 64 -#define MSIX_LEGACY_SZ 4 #define MIN_MSIX_P_PORT 5 +#define MLX4_IS_LEGACY_EQ_MODE(dev_cap) ((dev_cap).num_comp_vectors < \ + (dev_cap).num_ports * MIN_MSIX_P_PORT) #define MLX4_MAX_100M_UNITS_VAL 255 /* * work around: can't set values @@ -528,7 +529,6 @@ struct mlx4_caps { int num_eqs; int reserved_eqs; int num_comp_vectors; - int comp_pool; int num_mpts; int max_fmr_maps; int num_mtts; @@ -1332,10 +1332,13 @@ void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr, int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr); int mlx4_SYNC_TPT(struct mlx4_dev *dev); int mlx4_test_interrupts(struct mlx4_dev *dev); -int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, - int *vector); +u32 mlx4_get_eqs_per_port(struct mlx4_dev *dev, u8 port); +bool mlx4_is_eq_vector_valid(struct mlx4_dev *dev, u8 port, int vector); +struct cpu_rmap *mlx4_get_cpu_rmap(struct mlx4_dev *dev, int port); +int mlx4_assign_eq(struct mlx4_dev *dev, u8 port, int *vector); void mlx4_release_eq(struct mlx4_dev *dev, int vec); +int mlx4_is_eq_shared(struct mlx4_dev *dev, int vector); int mlx4_eq_get_irq(struct mlx4_dev *dev, int vec); int mlx4_get_phys_port_id(struct mlx4_dev *dev); -- cgit v1.2.3 From f0e6a326deec8b51ee12f82a34057efd3d0979b8 Mon Sep 17 00:00:00 2001 From: Abhishek Bist Date: Wed, 27 May 2015 23:54:19 +0530 Subject: USB: hcd.h : Removed an unnecessary function prototype usb_find_interface_driver() This function is used to call in early version of linux kernel in order to find out the interface used by a usb device. But now it's use is completely abolished. So,it would be relevant to remove this obselete function from kernel mainline. Signed-off-by: Abhishek Bist Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/hcd.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 68b1e836dff1..c9aa7792de10 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -622,8 +622,6 @@ extern struct list_head usb_bus_list; extern struct mutex usb_bus_list_lock; extern wait_queue_head_t usb_kill_urb_queue; -extern int usb_find_interface_driver(struct usb_device *dev, - struct usb_interface *interface); #define usb_endpoint_out(ep_dir) (!((ep_dir) & USB_DIR_IN)) -- cgit v1.2.3 From 138c3f03b017e261316a4f1ec793e1ff74516def Mon Sep 17 00:00:00 2001 From: Nikhil Badola Date: Tue, 26 May 2015 17:15:29 +0530 Subject: drivers:usb:fsl: Add support for USB controller version-2.5 Add support for USB controller version-2.5 used in T4240 rev2.0, T1024, T1040, T2080, LS1021A Signed-off-by: Nikhil Badola Signed-off-by: Greg Kroah-Hartman --- include/linux/fsl_devices.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index a82296af413f..2a2f56b292c1 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -24,6 +24,7 @@ #define FSL_USB_VER_1_6 1 #define FSL_USB_VER_2_2 2 #define FSL_USB_VER_2_4 3 +#define FSL_USB_VER_2_5 4 #include -- cgit v1.2.3 From 34d2e4584ae594eff29d1595d47d7d044e57f834 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 26 May 2015 13:28:48 +0900 Subject: serial: 8250: include from serial_8250.h The header file, include/linux/serial_8250.h, contains references to UART_LSR_BRK_ERROR_BITS and UART_MSR_ANY_DELTA that are defined in . Signed-off-by: Masahiro Yamada Reviewed-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_8250.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index f0c68d88b6f4..ba82c07feb95 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -12,6 +12,7 @@ #define _LINUX_SERIAL_8250_H #include +#include #include /* -- cgit v1.2.3 From abf2e7d6e2e315b32ee00067a69aaad2cf4e1b3f Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 28 May 2015 19:26:02 -0700 Subject: bpf: add missing rcu protection when releasing programs from prog_array Normally the program attachment place (like sockets, qdiscs) takes care of rcu protection and calls bpf_prog_put() after a grace period. The programs stored inside prog_array may not be attached anywhere, so prog_array needs to take care of preserving rcu protection. Otherwise bpf_tail_call() will race with bpf_prog_put(). To solve that introduce bpf_prog_put_rcu() helper function and use it in 3 places where unattached program can decrement refcnt: closing program fd, deleting/replacing program in prog_array. Fixes: 04fd61ab36ec ("bpf: allow bpf programs to tail-call other bpf programs") Reported-by: Martin Schwidefsky Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8821b9a8689e..5f520f5f087e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -123,7 +123,10 @@ struct bpf_prog_aux { const struct bpf_verifier_ops *ops; struct bpf_map **used_maps; struct bpf_prog *prog; - struct work_struct work; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; struct bpf_array { @@ -153,6 +156,7 @@ void bpf_register_map_type(struct bpf_map_type_list *tl); struct bpf_prog *bpf_prog_get(u32 ufd); void bpf_prog_put(struct bpf_prog *prog); +void bpf_prog_put_rcu(struct bpf_prog *prog); struct bpf_map *bpf_map_get(struct fd f); void bpf_map_put(struct bpf_map *map); -- cgit v1.2.3 From 3b369bd212d5cabb46cff0e863298971b382bbd6 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Thu, 28 May 2015 15:38:32 +0300 Subject: ieee802154: Fix generation of random EUI-64 addresses. Currently, ieee802154_random_extended_addr() has a 50% chance of generating a group (multicast) address, while this function is used for generating station addresses (which can't be group addresses) for interfaces that don't have a hardware-provided address. Also, in case get_random_bytes() generates the EUI-64 address 00:00:00:00:00:00:00:00 (extremely unlikely), which is an invalid address, ieee802154_random_extended_addr() reacts by changing it to 01:00:00:00:00:00:00:00, which is an invalid station address as well, as it is a group address. This patch changes the address generation procedure to grab eight random bytes, treat that as an EUI-64, and then clear the Group address bit and set the Locally Administered bit, which is in line with how eth_random_addr() generates random EUI-48s. Signed-off-by: Lennert Buytenhek Acked-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 8872ca103d06..552210d0a46f 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -244,9 +244,9 @@ static inline void ieee802154_random_extended_addr(__le64 *addr) { get_random_bytes(addr, IEEE802154_EXTENDED_ADDR_LEN); - /* toggle some bit if we hit an invalid extended addr */ - if (!ieee802154_is_valid_extended_addr(*addr)) - ((u8 *)addr)[IEEE802154_EXTENDED_ADDR_LEN - 1] ^= 0x01; + /* clear the group bit, and set the locally administered bit */ + ((u8 *)addr)[IEEE802154_EXTENDED_ADDR_LEN - 1] &= ~0x01; + ((u8 *)addr)[IEEE802154_EXTENDED_ADDR_LEN - 1] |= 0x02; } #endif /* LINUX_IEEE802154_H */ -- cgit v1.2.3 From daf4e2c89254ed6eb8cf7ef60f614edebfdb9f3a Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Thu, 28 May 2015 15:38:43 +0300 Subject: ieee802154: Fix EUI-64 station address validation. Refuse to allow setting an EUI-64 group address as an interface address, as those are not valid station addresses. Signed-off-by: Lennert Buytenhek Acked-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 552210d0a46f..1dc1f4ed4001 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -225,15 +225,13 @@ static inline bool ieee802154_is_valid_psdu_len(const u8 len) * ieee802154_is_valid_psdu_len - check if extended addr is valid * @addr: extended addr to check */ -static inline bool ieee802154_is_valid_extended_addr(const __le64 addr) +static inline bool ieee802154_is_valid_extended_unicast_addr(const __le64 addr) { - /* These EUI-64 addresses are reserved by IEEE. 0xffffffffffffffff - * is used internally as extended to short address broadcast mapping. - * This is currently a workaround because neighbor discovery can't - * deal with short addresses types right now. + /* Bail out if the address is all zero, or if the group + * address bit is set. */ return ((addr != cpu_to_le64(0x0000000000000000ULL)) && - (addr != cpu_to_le64(0xffffffffffffffffULL))); + !(addr & cpu_to_le64(0x0100000000000000ULL))); } /** -- cgit v1.2.3 From 1a1bc59c5f7657387d1a4b45d63248fed55ab88c Mon Sep 17 00:00:00 2001 From: Varka Bhadram Date: Fri, 29 May 2015 10:56:55 +0530 Subject: cc2520: fix CC2591 handling This patch changes tha way of handling of cc2591-cc2520 combination by moving amplified variable from platform data to private data. This will be useful in other sections like tx power support. Signed-off-by: Varka Bhadram Cc: Brad Campbell Signed-off-by: Marcel Holtmann --- include/linux/spi/cc2520.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spi/cc2520.h b/include/linux/spi/cc2520.h index e741e8baad92..85b8ee67e937 100644 --- a/include/linux/spi/cc2520.h +++ b/include/linux/spi/cc2520.h @@ -21,7 +21,6 @@ struct cc2520_platform_data { int sfd; int reset; int vreg; - bool amplified; }; #endif -- cgit v1.2.3 From 6c4e5f9c9ff41ea997fd0f345b3b2b88c113eb68 Mon Sep 17 00:00:00 2001 From: Keith Mange Date: Tue, 26 May 2015 14:23:01 -0700 Subject: Drivers: hv: vmbus:Update preferred vmbus protocol version to windows 10. Add support for Windows 10. Signed-off-by: Keith Mange Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 3932a993ff5a..4317cd1b69ed 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -160,16 +160,18 @@ hv_get_ringbuffer_availbytes(struct hv_ring_buffer_info *rbi, * 1 . 1 (Windows 7) * 2 . 4 (Windows 8) * 3 . 0 (Windows 8 R2) + * 4 . 0 (Windows 10) */ #define VERSION_WS2008 ((0 << 16) | (13)) #define VERSION_WIN7 ((1 << 16) | (1)) #define VERSION_WIN8 ((2 << 16) | (4)) #define VERSION_WIN8_1 ((3 << 16) | (0)) +#define VERSION_WIN10 ((4 << 16) | (0)) #define VERSION_INVAL -1 -#define VERSION_CURRENT VERSION_WIN8_1 +#define VERSION_CURRENT VERSION_WIN10 /* Make maximum size of pipe payload of 16K */ #define MAX_PIPE_DATA_PAYLOAD (sizeof(u8) * 16384) -- cgit v1.2.3 From 6fa45a22689722dac9f0e90c0931d4b34b334ede Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Wed, 20 May 2015 20:56:57 +0530 Subject: parport: add device-model to parport subsystem parport subsystem starts using the device-model. Drivers using the device-model has to define devmodel as true and should register the device with parport using parport_register_dev_model(). Tested-by: Jean Delvare Tested-by: Alan Cox Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- include/linux/parport.h | 43 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/parport.h b/include/linux/parport.h index c22f12547324..58e3c64c6b49 100644 --- a/include/linux/parport.h +++ b/include/linux/parport.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -145,6 +146,8 @@ struct pardevice { unsigned int flags; struct pardevice *next; struct pardevice *prev; + struct device dev; + bool devmodel; struct parport_state *state; /* saved status over preemption */ wait_queue_head_t wait_q; unsigned long int time; @@ -156,6 +159,8 @@ struct pardevice { void * sysctl_table; }; +#define to_pardevice(n) container_of(n, struct pardevice, dev) + /* IEEE1284 information */ /* IEEE1284 phases. These are exposed to userland through ppdev IOCTL @@ -195,7 +200,7 @@ struct parport { * This may unfortulately be null if the * port has a legacy driver. */ - + struct device bus_dev; /* to link with the bus */ struct parport *physport; /* If this is a non-default mux parport, i.e. we're a clone of a real @@ -245,15 +250,26 @@ struct parport { struct parport *slaves[3]; }; +#define to_parport_dev(n) container_of(n, struct parport, bus_dev) + #define DEFAULT_SPIN_TIME 500 /* us */ struct parport_driver { const char *name; void (*attach) (struct parport *); void (*detach) (struct parport *); + void (*match_port)(struct parport *); + int (*probe)(struct pardevice *); + struct device_driver driver; + bool devmodel; struct list_head list; }; +#define to_parport_driver(n) container_of(n, struct parport_driver, driver) + +int parport_bus_init(void); +void parport_bus_exit(void); + /* parport_register_port registers a new parallel port at the given address (if one does not already exist) and returns a pointer to it. This entails claiming the I/O region, IRQ and DMA. NULL is returned @@ -272,10 +288,20 @@ void parport_announce_port (struct parport *port); extern void parport_remove_port(struct parport *port); /* Register a new high-level driver. */ -extern int parport_register_driver (struct parport_driver *); + +int __must_check __parport_register_driver(struct parport_driver *, + struct module *, + const char *mod_name); +/* + * parport_register_driver must be a macro so that KBUILD_MODNAME can + * be expanded + */ +#define parport_register_driver(driver) \ + __parport_register_driver(driver, THIS_MODULE, KBUILD_MODNAME) /* Unregister a high-level driver. */ extern void parport_unregister_driver (struct parport_driver *); +void parport_unregister_driver(struct parport_driver *); /* If parport_register_driver doesn't fit your needs, perhaps * parport_find_xxx does. */ @@ -288,6 +314,15 @@ extern irqreturn_t parport_irq_handler(int irq, void *dev_id); /* Reference counting for ports. */ extern struct parport *parport_get_port (struct parport *); extern void parport_put_port (struct parport *); +void parport_del_port(struct parport *); + +struct pardev_cb { + int (*preempt)(void *); + void (*wakeup)(void *); + void *private; + void (*irq_func)(void *); + unsigned int flags; +}; /* parport_register_device declares that a device is connected to a port, and tells the kernel all it needs to know. @@ -301,6 +336,10 @@ struct pardevice *parport_register_device(struct parport *port, void (*irq_func)(void *), int flags, void *handle); +struct pardevice * +parport_register_dev_model(struct parport *port, const char *name, + const struct pardev_cb *par_dev_cb, int cnt); + /* parport_unregister unlinks a device from the chain. */ extern void parport_unregister_device(struct pardevice *dev); -- cgit v1.2.3 From b84b1d522f979fb53ad347605e24b2940fa2ad99 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 29 Apr 2015 08:57:34 +0200 Subject: scsi: Do not set cmd_per_lun to 1 in the host template '0' is now used as the default cmd_per_lun value, so there's no need to explicitly set it to '1' in the host template. Signed-off-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: James Bottomley --- include/linux/libata.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 8dad4a307bb8..1402291aab5e 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -134,7 +134,6 @@ enum { ATA_ALL_DEVICES = (1 << ATA_MAX_DEVICES) - 1, ATA_SHT_EMULATED = 1, - ATA_SHT_CMD_PER_LUN = 1, ATA_SHT_THIS_ID = -1, ATA_SHT_USE_CLUSTERING = 1, @@ -1354,7 +1353,6 @@ extern struct device_attribute *ata_common_sdev_attrs[]; .can_queue = ATA_DEF_QUEUE, \ .tag_alloc_policy = BLK_TAG_ALLOC_RR, \ .this_id = ATA_SHT_THIS_ID, \ - .cmd_per_lun = ATA_SHT_CMD_PER_LUN, \ .emulated = ATA_SHT_EMULATED, \ .use_clustering = ATA_SHT_USE_CLUSTERING, \ .proc_name = drv_name, \ -- cgit v1.2.3 From 1c4b1d73bacc546ba4e42f7eb4cb88c54139820b Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 27 May 2015 13:57:46 +0200 Subject: tty: move linux/gsmmux.h to uapi linux/gsmmux.h defines a user interface and therefore should be installed with other headers. Make the file include: * linux/if.h for IFNAMSIZ * linux/ioctl.h for _IO* macros Signed-off-by: Jiri Slaby Cc: Alan Cox Signed-off-by: Greg Kroah-Hartman --- include/linux/gsmmux.h | 36 ------------------------------------ 1 file changed, 36 deletions(-) delete mode 100644 include/linux/gsmmux.h (limited to 'include/linux') diff --git a/include/linux/gsmmux.h b/include/linux/gsmmux.h deleted file mode 100644 index c25e9477f7c3..000000000000 --- a/include/linux/gsmmux.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef _LINUX_GSMMUX_H -#define _LINUX_GSMMUX_H - -struct gsm_config -{ - unsigned int adaption; - unsigned int encapsulation; - unsigned int initiator; - unsigned int t1; - unsigned int t2; - unsigned int t3; - unsigned int n2; - unsigned int mru; - unsigned int mtu; - unsigned int k; - unsigned int i; - unsigned int unused[8]; /* Padding for expansion without - breaking stuff */ -}; - -#define GSMIOC_GETCONF _IOR('G', 0, struct gsm_config) -#define GSMIOC_SETCONF _IOW('G', 1, struct gsm_config) - -struct gsm_netconfig { - unsigned int adaption; /* Adaption to use in network mode */ - unsigned short protocol;/* Protocol to use - only ETH_P_IP supported */ - unsigned short unused2; - char if_name[IFNAMSIZ]; /* interface name format string */ - __u8 unused[28]; /* For future use */ -}; - -#define GSMIOC_ENABLE_NET _IOW('G', 2, struct gsm_netconfig) -#define GSMIOC_DISABLE_NET _IO('G', 3) - - -#endif -- cgit v1.2.3 From 1f656ff3fdddc2f59649cc84b633b799908f1f7b Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Sat, 30 May 2015 23:37:48 -0700 Subject: Drivers: hv: vmbus: Implement NUMA aware CPU affinity for channels Channels/sub-channels can be affinitized to VCPUs in the guest. Implement this affinity in a way that is NUMA aware. The current protocol distributed the primary channels uniformly across all available CPUs. The new protocol is NUMA aware: primary channels are distributed across the available NUMA nodes while the sub-channels within a primary channel are distributed amongst CPUs within the NUMA node assigned to the primary channel. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 4317cd1b69ed..30d3a1f79450 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -696,6 +696,11 @@ struct vmbus_channel { u32 target_vp; /* The corresponding CPUID in the guest */ u32 target_cpu; + /* + * State to manage the CPU affiliation of channels. + */ + struct cpumask alloced_cpus_in_node; + int numa_node; /* * Support for sub-channels. For high performance devices, * it will be useful to have multiple sub-channels to support -- cgit v1.2.3 From 17ca8cbf49be3aa94bb1c2b7ee6545fd70094eb4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 29 May 2015 23:23:06 +0200 Subject: ebpf: allow bpf_ktime_get_ns_proto also for networking As this is already exported from tracing side via commit d9847d310ab4 ("tracing: Allow BPF programs to call bpf_ktime_get_ns()"), we might as well want to move it to the core, so also networking users can make use of it, e.g. to measure diffs for certain flows from ingress/egress. Signed-off-by: Daniel Borkmann Cc: Alexei Starovoitov Cc: Ingo Molnar Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5f520f5f087e..ca854e5bb2f7 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -186,5 +186,6 @@ extern const struct bpf_func_proto bpf_map_delete_elem_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; extern const struct bpf_func_proto bpf_tail_call_proto; +extern const struct bpf_func_proto bpf_ktime_get_ns_proto; #endif /* _LINUX_BPF_H */ -- cgit v1.2.3 From 887ee43477e4e327dbcd2aabc2d78a5116ed8a33 Mon Sep 17 00:00:00 2001 From: Beomho Seo Date: Thu, 30 Apr 2015 13:07:43 +0900 Subject: hwmon: (ntc_thermistor) Add support for ncpXXwf104 This patch adds support for the ntc thermistor NCPXXWF104 series. Cc: Jean Delvare Cc: Guenter Roeck Signed-off-by: Beomho Seo Signed-off-by: Guenter Roeck --- include/linux/platform_data/ntc_thermistor.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/ntc_thermistor.h b/include/linux/platform_data/ntc_thermistor.h index 0a6de4ca4930..aed170588b74 100644 --- a/include/linux/platform_data/ntc_thermistor.h +++ b/include/linux/platform_data/ntc_thermistor.h @@ -27,6 +27,7 @@ enum ntc_thermistor_type { TYPE_NCPXXWB473, TYPE_NCPXXWL333, TYPE_B57330V2103, + TYPE_NCPXXWF104, }; struct ntc_thermistor_platform_data { -- cgit v1.2.3 From dfa13ebbe3340e538b988f5608efd9ff2ca7fc35 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 7 May 2015 13:10:12 +0300 Subject: mmc: host: Add facility to support re-tuning Currently, there is core support for tuning during initialization. There can also be a need to re-tune periodically (e.g. sdhci) or to re-tune after the host controller is powered off (e.g. after PM runtime suspend / resume) or to re-tune in response to CRC errors. The main requirements for re-tuning are: - ability to enable / disable re-tuning - ability to flag that re-tuning is needed - ability to re-tune before any request - ability to hold off re-tuning if the card is busy - ability to hold off re-tuning if re-tuning is in progress - ability to run a re-tuning timer To support those requirements 7 members are added to struct mmc_host: unsigned int can_retune:1; /* re-tuning can be used */ unsigned int doing_retune:1; /* re-tuning in progress */ unsigned int retune_now:1; /* do re-tuning at next req */ int need_retune; /* re-tuning is needed */ int hold_retune; /* hold off re-tuning */ unsigned int retune_period; /* re-tuning period in secs */ struct timer_list retune_timer; /* for periodic re-tuning */ need_retune is an integer so it can be set without needing synchronization. hold_retune is a integer to allow nesting. Various simple functions are provided to set / clear those variables. Subsequent patches take those functions into use. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index b5bedaec6223..f471193ef6d6 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -321,10 +322,18 @@ struct mmc_host { #ifdef CONFIG_MMC_DEBUG unsigned int removed:1; /* host is being removed */ #endif + unsigned int can_retune:1; /* re-tuning can be used */ + unsigned int doing_retune:1; /* re-tuning in progress */ + unsigned int retune_now:1; /* do re-tuning at next req */ int rescan_disable; /* disable card detection */ int rescan_entered; /* used with nonremovable devices */ + int need_retune; /* re-tuning is needed */ + int hold_retune; /* hold off re-tuning */ + unsigned int retune_period; /* re-tuning period in secs */ + struct timer_list retune_timer; /* for periodic re-tuning */ + bool trigger_card_event; /* card_event necessary */ struct mmc_card *card; /* device attached to this host */ @@ -513,4 +522,18 @@ static inline bool mmc_card_hs400(struct mmc_card *card) return card->host->ios.timing == MMC_TIMING_MMC_HS400; } +void mmc_retune_timer_stop(struct mmc_host *host); + +static inline void mmc_retune_needed(struct mmc_host *host) +{ + if (host->can_retune) + host->need_retune = 1; +} + +static inline void mmc_retune_recheck(struct mmc_host *host) +{ + if (host->hold_retune <= 1) + host->retune_now = 1; +} + #endif /* LINUX_MMC_HOST_H */ -- cgit v1.2.3 From 9f6e0bff2afb52a4c29f5ca8a4db01810357974e Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Wed, 6 May 2015 20:31:19 +0200 Subject: mmc: Add support for disabling write-protect detection It is not uncommon to see systems where there is no physical write-protect signal (e.g. when using eMMC or microSD card slots). For some controllers, which have a dedicated write-protection detection logic (like SDHCI controllers), the get_ro() callback can return bogus data in such a case. Instead of handling this on a per controller basis this patch adds a new capability flag to the MMC core that can be set to specify that the result of get_ro() is invalid. When the flag is set the core will not call get_ro() and assume that the card is always read-write. Signed-off-by: Lars-Peter Clausen Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index f471193ef6d6..433eccb50838 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -286,6 +286,7 @@ struct mmc_host { MMC_CAP2_HS400_1_2V) #define MMC_CAP2_HSX00_1_2V (MMC_CAP2_HS200_1_2V_SDR | MMC_CAP2_HS400_1_2V) #define MMC_CAP2_SDIO_IRQ_NOTHREAD (1 << 17) +#define MMC_CAP2_NO_WRITE_PROTECT (1 << 18) /* No physical write protect pin, assume that card is always read-write */ mmc_pm_flag_t pm_caps; /* supported pm features */ -- cgit v1.2.3 From eff8f2f5df1c509c873cdc70c84eb2ee75b41e65 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Wed, 6 May 2015 20:31:22 +0200 Subject: mmc: dw_mmc: Use core to handle absent write protect line Use the new MMC_CAP2_NO_WRITE_PROTECT to let the core handle the case where no write protect line is present instead of having custom driver code to handle it. dw_mci_of_get_slot_quirks() is slightly refactored to directly modify the mmc_host capabilities instead of returning a quirk mask. Signed-off-by: Lars-Peter Clausen Signed-off-by: Jaehoon Chung Signed-off-by: Ulf Hansson --- include/linux/mmc/dw_mmc.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h index 12111993a317..5be97676f1fa 100644 --- a/include/linux/mmc/dw_mmc.h +++ b/include/linux/mmc/dw_mmc.h @@ -226,12 +226,6 @@ struct dw_mci_dma_ops { #define DW_MCI_QUIRK_HIGHSPEED BIT(2) /* Unreliable card detection */ #define DW_MCI_QUIRK_BROKEN_CARD_DETECTION BIT(3) -/* No write protect */ -#define DW_MCI_QUIRK_NO_WRITE_PROTECT BIT(4) - -/* Slot level quirks */ -/* This slot has no write protect */ -#define DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT BIT(0) struct dma_pdata; -- cgit v1.2.3 From b4f30a174e1fda8118eda038b5d8d5260db36ad5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 6 Feb 2015 14:12:52 +0200 Subject: mmc: core: Allow card drive strength to be different to host Initialization of UHS-I modes for SD and SDIO cards employs a callback to allow the host driver to choose a drive strength value. Currently that assumes the card drive strength and host driver type must be the same value. Change to let the callback make that decision and return both the card drive strength and host driver type. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 433eccb50838..da33d18c66c8 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -132,7 +132,8 @@ struct mmc_host_ops { /* Prepare HS400 target operating frequency depending host driver */ int (*prepare_hs400_tuning)(struct mmc_host *host, struct mmc_ios *ios); - int (*select_drive_strength)(unsigned int max_dtr, int host_drv, int card_drv); + int (*select_drive_strength)(unsigned int max_dtr, int host_drv, + int card_drv, int *drv_type); void (*hw_reset)(struct mmc_host *host); void (*card_event)(struct mmc_host *host); -- cgit v1.2.3 From f168359efbb99d6f8591bb666d6510bb78df2d07 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 6 Feb 2015 14:12:54 +0200 Subject: mmc: core: Add 'card' to drive strength selection callback In preparation for supporting also eMMC drive strength, add the 'card' as a parameter so that the callback can distinguish different types of cards if necessary. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index da33d18c66c8..1369e54faeb7 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -132,7 +132,8 @@ struct mmc_host_ops { /* Prepare HS400 target operating frequency depending host driver */ int (*prepare_hs400_tuning)(struct mmc_host *host, struct mmc_ios *ios); - int (*select_drive_strength)(unsigned int max_dtr, int host_drv, + int (*select_drive_strength)(struct mmc_card *card, + unsigned int max_dtr, int host_drv, int card_drv, int *drv_type); void (*hw_reset)(struct mmc_host *host); void (*card_event)(struct mmc_host *host); -- cgit v1.2.3 From 3853a042325e8f497c199020979c4fc824528c6e Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 6 Feb 2015 14:12:56 +0200 Subject: mmc: core: Record card drive strength In preparation for adding drive strength support for eMMC, add drive_strength to struct mmc_card to record the card drive strength for UHS-I modes and HS200 / HS400. For eMMC this will be needed when switching between HS200 and HS400. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- include/linux/mmc/card.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 19f0175c0afa..2f073d555793 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -305,6 +305,7 @@ struct mmc_card { unsigned int sd_bus_speed; /* Bus Speed Mode set for the card */ unsigned int mmc_avail_type; /* supported device type by both host and card */ + unsigned int drive_strength; /* for UHS-I, HS200 or HS400 */ struct dentry *debugfs_root; struct mmc_part part[MMC_NUM_PHY_PARTITION]; /* physical partitions */ -- cgit v1.2.3 From b097e07f57930eda774c83aa46e8e401686d01dc Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 6 Feb 2015 14:12:57 +0200 Subject: mmc: mmc: Read card's valid driver strength mask In preparation for supporing drive strength selection for eMMC, read the card's valid driver strengths. Note that though the SD spec uses the term "drive strength", the JEDEC eMMC spec uses the term "driver strength". Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- include/linux/mmc/card.h | 1 + include/linux/mmc/mmc.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 2f073d555793..4d3776d25925 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -97,6 +97,7 @@ struct mmc_ext_csd { u8 raw_erased_mem_count; /* 181 */ u8 raw_ext_csd_structure; /* 194 */ u8 raw_card_type; /* 196 */ + u8 raw_driver_strength; /* 197 */ u8 out_of_int_time; /* 198 */ u8 raw_pwr_cl_52_195; /* 200 */ u8 raw_pwr_cl_26_195; /* 201 */ diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 124f562118b8..4819cfbc3795 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -302,6 +302,7 @@ struct _mmc_csd { #define EXT_CSD_REV 192 /* RO */ #define EXT_CSD_STRUCTURE 194 /* RO */ #define EXT_CSD_CARD_TYPE 196 /* RO */ +#define EXT_CSD_DRIVER_STRENGTH 197 /* RO */ #define EXT_CSD_OUT_OF_INTERRUPT_TIME 198 /* RO */ #define EXT_CSD_PART_SWITCH_TIME 199 /* RO */ #define EXT_CSD_PWR_CL_52_195 200 /* RO */ -- cgit v1.2.3 From cc4f414c885cd04f7227ad9bcd6b18fd78d718d9 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 6 Feb 2015 14:12:58 +0200 Subject: mmc: mmc: Add driver strength selection Add the ability to set eMMC driver strength for HS200 and HS400. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- include/linux/mmc/mmc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 4819cfbc3795..15f2c4a0a62c 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -391,6 +391,7 @@ struct _mmc_csd { #define EXT_CSD_TIMING_HS 1 /* High speed */ #define EXT_CSD_TIMING_HS200 2 /* HS200 */ #define EXT_CSD_TIMING_HS400 3 /* HS400 */ +#define EXT_CSD_DRV_STR_SHIFT 4 /* Driver Strength shift */ #define EXT_CSD_SEC_ER_EN BIT(0) #define EXT_CSD_SEC_BD_BLK_EN BIT(2) @@ -442,4 +443,6 @@ struct _mmc_csd { #define MMC_SWITCH_MODE_CLEAR_BITS 0x02 /* Clear bits which are 1 in value */ #define MMC_SWITCH_MODE_WRITE_BYTE 0x03 /* Set target to value */ +#define mmc_driver_type_mask(n) (1 << (n)) + #endif /* LINUX_MMC_MMC_H */ -- cgit v1.2.3 From e1bfad6d936d7149a83423e2a7244dd5771f27e7 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 6 Feb 2015 14:13:00 +0200 Subject: mmc: sdhci-pci: Add support for drive strength selection for SPT Implement the select_drive_strength callback to provide drive strength selection for Intel SPT. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- include/linux/mmc/sdhci-pci-data.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/sdhci-pci-data.h b/include/linux/mmc/sdhci-pci-data.h index 8959604a13d3..fda15b6d4135 100644 --- a/include/linux/mmc/sdhci-pci-data.h +++ b/include/linux/mmc/sdhci-pci-data.h @@ -15,4 +15,6 @@ struct sdhci_pci_data { extern struct sdhci_pci_data *(*sdhci_pci_get_data)(struct pci_dev *pdev, int slotno); +extern int sdhci_pci_spt_drive_strength; + #endif -- cgit v1.2.3 From 225d59adf1c899176cce0fc80e42b1d1c12f109f Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Fri, 29 May 2015 18:14:21 +0200 Subject: iio: Specify supported modes for buffers For each buffer type specify the supported device modes for this buffer. This allows us for devices which support multiple different operating modes to pick the correct operating mode based on the modes supported by the attached buffers. It also prevents that buffers with conflicting modes are attached to a device at the same time or that a buffer with a non-supported mode is attached to a device (e.g. in-kernel callback buffer to a device only supporting hardware mode). Signed-off-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron --- include/linux/iio/buffer.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h index eb8622b78ec9..1600c55828e0 100644 --- a/include/linux/iio/buffer.h +++ b/include/linux/iio/buffer.h @@ -29,6 +29,7 @@ struct iio_buffer; * @set_length: set number of datums in buffer * @release: called when the last reference to the buffer is dropped, * should free all resources allocated by the buffer. + * @modes: Supported operating modes by this buffer type * * The purpose of this structure is to make the buffer element * modular as event for a given driver, different usecases may require @@ -51,6 +52,8 @@ struct iio_buffer_access_funcs { int (*set_length)(struct iio_buffer *buffer, int length); void (*release)(struct iio_buffer *buffer); + + unsigned int modes; }; /** -- cgit v1.2.3 From 04fba7864ffcceae8a5f78d88ae1fd8d682a5123 Mon Sep 17 00:00:00 2001 From: Goffredo Baroncelli Date: Sat, 30 May 2015 11:00:26 +0200 Subject: HID: Export hid_field_extract() Rename the function extract() to hid_field_extract(), make it external linkage to allow the use from other modules. Suggested-by: Jiri Kosina Signed-off-by: Goffredo Baroncelli Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- include/linux/hid.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 176b43670e5d..f17980de2662 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -815,6 +815,8 @@ void hid_disconnect(struct hid_device *hid); const struct hid_device_id *hid_match_id(struct hid_device *hdev, const struct hid_device_id *id); s32 hid_snto32(__u32 value, unsigned n); +__u32 hid_field_extract(const struct hid_device *hid, __u8 *report, + unsigned offset, unsigned n); /** * hid_device_io_start - enable HID input during probe, remove -- cgit v1.2.3 From 3fff99bc4e926d9602a7d6e8c008a0175a099ce4 Mon Sep 17 00:00:00 2001 From: Rojhalat Ibrahim Date: Wed, 13 May 2015 11:04:56 +0200 Subject: gpiolib: rename gpiod_set_array to gpiod_set_array_value There have been concerns that the function names gpiod_set_array() and gpiod_get_array() might be confusing to users. One might expect gpiod_get_array() to return array values, while it is actually the array counterpart of gpiod_get(). To be consistent with the single descriptor API we could rename gpiod_set_array() to gpiod_set_array_value(). This makes some function names a bit lengthy: gpiod_set_raw_array_value_cansleep(). Signed-off-by: Rojhalat Ibrahim Acked-by: Alexandre Courbot Signed-off-by: Linus Walleij --- include/linux/gpio/consumer.h | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 09a7fb0062a6..fd098169fe87 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -100,24 +100,25 @@ int gpiod_direction_output_raw(struct gpio_desc *desc, int value); /* Value get/set from non-sleeping context */ int gpiod_get_value(const struct gpio_desc *desc); void gpiod_set_value(struct gpio_desc *desc, int value); -void gpiod_set_array(unsigned int array_size, - struct gpio_desc **desc_array, int *value_array); +void gpiod_set_array_value(unsigned int array_size, + struct gpio_desc **desc_array, int *value_array); int gpiod_get_raw_value(const struct gpio_desc *desc); void gpiod_set_raw_value(struct gpio_desc *desc, int value); -void gpiod_set_raw_array(unsigned int array_size, - struct gpio_desc **desc_array, int *value_array); +void gpiod_set_raw_array_value(unsigned int array_size, + struct gpio_desc **desc_array, + int *value_array); /* Value get/set from sleeping context */ int gpiod_get_value_cansleep(const struct gpio_desc *desc); void gpiod_set_value_cansleep(struct gpio_desc *desc, int value); -void gpiod_set_array_cansleep(unsigned int array_size, - struct gpio_desc **desc_array, - int *value_array); +void gpiod_set_array_value_cansleep(unsigned int array_size, + struct gpio_desc **desc_array, + int *value_array); int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc); void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value); -void gpiod_set_raw_array_cansleep(unsigned int array_size, - struct gpio_desc **desc_array, - int *value_array); +void gpiod_set_raw_array_value_cansleep(unsigned int array_size, + struct gpio_desc **desc_array, + int *value_array); int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce); @@ -304,9 +305,9 @@ static inline void gpiod_set_value(struct gpio_desc *desc, int value) /* GPIO can never have been requested */ WARN_ON(1); } -static inline void gpiod_set_array(unsigned int array_size, - struct gpio_desc **desc_array, - int *value_array) +static inline void gpiod_set_array_value(unsigned int array_size, + struct gpio_desc **desc_array, + int *value_array) { /* GPIO can never have been requested */ WARN_ON(1); @@ -322,9 +323,9 @@ static inline void gpiod_set_raw_value(struct gpio_desc *desc, int value) /* GPIO can never have been requested */ WARN_ON(1); } -static inline void gpiod_set_raw_array(unsigned int array_size, - struct gpio_desc **desc_array, - int *value_array) +static inline void gpiod_set_raw_array_value(unsigned int array_size, + struct gpio_desc **desc_array, + int *value_array) { /* GPIO can never have been requested */ WARN_ON(1); @@ -341,7 +342,7 @@ static inline void gpiod_set_value_cansleep(struct gpio_desc *desc, int value) /* GPIO can never have been requested */ WARN_ON(1); } -static inline void gpiod_set_array_cansleep(unsigned int array_size, +static inline void gpiod_set_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, int *value_array) { @@ -360,7 +361,7 @@ static inline void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, /* GPIO can never have been requested */ WARN_ON(1); } -static inline void gpiod_set_raw_array_cansleep(unsigned int array_size, +static inline void gpiod_set_raw_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, int *value_array) { -- cgit v1.2.3 From 5fbf65d5c9c0fd2e5c6c48d69ce34b1c5415f2fd Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 22 May 2015 15:19:37 +0900 Subject: pinctrl: remove useless const qualifier This "const" claims the get_function_groups callback never changes the given num_groups pointer. It is always true in C language, so not worth mentioning. Signed-off-by: Masahiro Yamada Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinmux.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pinctrl/pinmux.h b/include/linux/pinctrl/pinmux.h index d3740fa7073f..ace60d775b20 100644 --- a/include/linux/pinctrl/pinmux.h +++ b/include/linux/pinctrl/pinmux.h @@ -69,7 +69,7 @@ struct pinmux_ops { int (*get_function_groups) (struct pinctrl_dev *pctldev, unsigned selector, const char * const **groups, - unsigned * const num_groups); + unsigned *num_groups); int (*set_mux) (struct pinctrl_dev *pctldev, unsigned func_selector, unsigned group_selector); int (*gpio_request_enable) (struct pinctrl_dev *pctldev, -- cgit v1.2.3 From b3da97ee581387cd42dafd76eb2ac23f2335cd92 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 22 May 2015 15:25:50 +0900 Subject: pinctrl: use "const struct ..." rather than "struct ... const" Only this member, pins, is defined as "struct ... const *", but the others in this struct, pinlops, pmxops, confops, etc. are defined as "const struct ... *". Swap the "struct pinctrl_pin_desc" and "const" for consistency. Signed-off-by: Masahiro Yamada Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinctrl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h index 66e4697516de..9ba59fcba549 100644 --- a/include/linux/pinctrl/pinctrl.h +++ b/include/linux/pinctrl/pinctrl.h @@ -127,7 +127,7 @@ struct pinctrl_ops { */ struct pinctrl_desc { const char *name; - struct pinctrl_pin_desc const *pins; + const struct pinctrl_pin_desc *pins; unsigned int npins; const struct pinctrl_ops *pctlops; const struct pinmux_ops *pmxops; -- cgit v1.2.3 From cf561f0d2eb74574ad9985a2feab134267a9d298 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 24 Apr 2015 14:24:27 +0200 Subject: virtio: introduce virtio_is_little_endian() helper Signed-off-by: Greg Kurz Signed-off-by: Michael S. Tsirkin Acked-by: Cornelia Huck Reviewed-by: David Gibson --- include/linux/virtio_config.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 1e306f727edc..170947eb11b5 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -205,35 +205,40 @@ int virtqueue_set_affinity(struct virtqueue *vq, int cpu) return 0; } +static inline bool virtio_is_little_endian(struct virtio_device *vdev) +{ + return virtio_has_feature(vdev, VIRTIO_F_VERSION_1); +} + /* Memory accessors */ static inline u16 virtio16_to_cpu(struct virtio_device *vdev, __virtio16 val) { - return __virtio16_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __virtio16_to_cpu(virtio_is_little_endian(vdev), val); } static inline __virtio16 cpu_to_virtio16(struct virtio_device *vdev, u16 val) { - return __cpu_to_virtio16(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __cpu_to_virtio16(virtio_is_little_endian(vdev), val); } static inline u32 virtio32_to_cpu(struct virtio_device *vdev, __virtio32 val) { - return __virtio32_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __virtio32_to_cpu(virtio_is_little_endian(vdev), val); } static inline __virtio32 cpu_to_virtio32(struct virtio_device *vdev, u32 val) { - return __cpu_to_virtio32(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __cpu_to_virtio32(virtio_is_little_endian(vdev), val); } static inline u64 virtio64_to_cpu(struct virtio_device *vdev, __virtio64 val) { - return __virtio64_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __virtio64_to_cpu(virtio_is_little_endian(vdev), val); } static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val) { - return __cpu_to_virtio64(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __cpu_to_virtio64(virtio_is_little_endian(vdev), val); } /* Config space accessors. */ -- cgit v1.2.3 From 5da7b160b36a42f161980c5e20d3960d6d076fb8 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 24 Apr 2015 14:24:58 +0200 Subject: vringh: introduce vringh_is_little_endian() helper Signed-off-by: Greg Kurz Signed-off-by: Michael S. Tsirkin Acked-by: Cornelia Huck Reviewed-by: David Gibson --- include/linux/vringh.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vringh.h b/include/linux/vringh.h index a3fa537e717a..3ed62efc2bc5 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -226,33 +226,38 @@ static inline void vringh_notify(struct vringh *vrh) vrh->notify(vrh); } +static inline bool vringh_is_little_endian(const struct vringh *vrh) +{ + return vrh->little_endian; +} + static inline u16 vringh16_to_cpu(const struct vringh *vrh, __virtio16 val) { - return __virtio16_to_cpu(vrh->little_endian, val); + return __virtio16_to_cpu(vringh_is_little_endian(vrh), val); } static inline __virtio16 cpu_to_vringh16(const struct vringh *vrh, u16 val) { - return __cpu_to_virtio16(vrh->little_endian, val); + return __cpu_to_virtio16(vringh_is_little_endian(vrh), val); } static inline u32 vringh32_to_cpu(const struct vringh *vrh, __virtio32 val) { - return __virtio32_to_cpu(vrh->little_endian, val); + return __virtio32_to_cpu(vringh_is_little_endian(vrh), val); } static inline __virtio32 cpu_to_vringh32(const struct vringh *vrh, u32 val) { - return __cpu_to_virtio32(vrh->little_endian, val); + return __cpu_to_virtio32(vringh_is_little_endian(vrh), val); } static inline u64 vringh64_to_cpu(const struct vringh *vrh, __virtio64 val) { - return __virtio64_to_cpu(vrh->little_endian, val); + return __virtio64_to_cpu(vringh_is_little_endian(vrh), val); } static inline __virtio64 cpu_to_vringh64(const struct vringh *vrh, u64 val) { - return __cpu_to_virtio64(vrh->little_endian, val); + return __cpu_to_virtio64(vringh_is_little_endian(vrh), val); } #endif /* _LINUX_VRINGH_H */ -- cgit v1.2.3 From 7d82410950aa74adccf035c332e409af2bb93e92 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 24 Apr 2015 14:26:24 +0200 Subject: virtio: add explicit big-endian support to memory accessors The current memory accessors logic is: - little endian if little_endian - native endian (i.e. no byteswap) if !little_endian If we want to fully support cross-endian vhost, we also need to be able to convert to big endian. Instead of changing the little_endian argument to some 3-value enum, this patch changes the logic to: - little endian if little_endian - big endian if !little_endian The native endian case is handled by all users with a trivial helper. This patch doesn't change any functionality, nor it does add overhead. Signed-off-by: Greg Kurz Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck Reviewed-by: David Gibson --- include/linux/virtio_byteorder.h | 24 ++++++++++++++---------- include/linux/virtio_config.h | 3 ++- include/linux/vringh.h | 3 ++- 3 files changed, 18 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_byteorder.h b/include/linux/virtio_byteorder.h index 51865d05b267..ce63a2c3a612 100644 --- a/include/linux/virtio_byteorder.h +++ b/include/linux/virtio_byteorder.h @@ -3,17 +3,21 @@ #include #include -/* - * Low-level memory accessors for handling virtio in modern little endian and in - * compatibility native endian format. - */ +static inline bool virtio_legacy_is_little_endian(void) +{ +#ifdef __LITTLE_ENDIAN + return true; +#else + return false; +#endif +} static inline u16 __virtio16_to_cpu(bool little_endian, __virtio16 val) { if (little_endian) return le16_to_cpu((__force __le16)val); else - return (__force u16)val; + return be16_to_cpu((__force __be16)val); } static inline __virtio16 __cpu_to_virtio16(bool little_endian, u16 val) @@ -21,7 +25,7 @@ static inline __virtio16 __cpu_to_virtio16(bool little_endian, u16 val) if (little_endian) return (__force __virtio16)cpu_to_le16(val); else - return (__force __virtio16)val; + return (__force __virtio16)cpu_to_be16(val); } static inline u32 __virtio32_to_cpu(bool little_endian, __virtio32 val) @@ -29,7 +33,7 @@ static inline u32 __virtio32_to_cpu(bool little_endian, __virtio32 val) if (little_endian) return le32_to_cpu((__force __le32)val); else - return (__force u32)val; + return be32_to_cpu((__force __be32)val); } static inline __virtio32 __cpu_to_virtio32(bool little_endian, u32 val) @@ -37,7 +41,7 @@ static inline __virtio32 __cpu_to_virtio32(bool little_endian, u32 val) if (little_endian) return (__force __virtio32)cpu_to_le32(val); else - return (__force __virtio32)val; + return (__force __virtio32)cpu_to_be32(val); } static inline u64 __virtio64_to_cpu(bool little_endian, __virtio64 val) @@ -45,7 +49,7 @@ static inline u64 __virtio64_to_cpu(bool little_endian, __virtio64 val) if (little_endian) return le64_to_cpu((__force __le64)val); else - return (__force u64)val; + return be64_to_cpu((__force __be64)val); } static inline __virtio64 __cpu_to_virtio64(bool little_endian, u64 val) @@ -53,7 +57,7 @@ static inline __virtio64 __cpu_to_virtio64(bool little_endian, u64 val) if (little_endian) return (__force __virtio64)cpu_to_le64(val); else - return (__force __virtio64)val; + return (__force __virtio64)cpu_to_be64(val); } #endif /* _LINUX_VIRTIO_BYTEORDER */ diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 170947eb11b5..e5ce8ab0b8b0 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -207,7 +207,8 @@ int virtqueue_set_affinity(struct virtqueue *vq, int cpu) static inline bool virtio_is_little_endian(struct virtio_device *vdev) { - return virtio_has_feature(vdev, VIRTIO_F_VERSION_1); + return virtio_has_feature(vdev, VIRTIO_F_VERSION_1) || + virtio_legacy_is_little_endian(); } /* Memory accessors */ diff --git a/include/linux/vringh.h b/include/linux/vringh.h index 3ed62efc2bc5..bc6c28d04263 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -228,7 +228,8 @@ static inline void vringh_notify(struct vringh *vrh) static inline bool vringh_is_little_endian(const struct vringh *vrh) { - return vrh->little_endian; + return vrh->little_endian || + virtio_legacy_is_little_endian(); } static inline u16 vringh16_to_cpu(const struct vringh *vrh, __virtio16 val) -- cgit v1.2.3 From 4af34b572a85c44c55491a10693535a79627c478 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 1 Jun 2015 11:04:26 +0200 Subject: drivers: soc: sunxi: Introduce SoC driver to map SRAMs The Allwinner SoCs have a handful of SRAM that can be either mapped to be accessible by devices or the CPU. That mapping is controlled by an SRAM controller, and that mapping might not be set by the bootloader, for example if the device wasn't used at all, or if we're using solutions like the U-Boot's Falcon Boot. We could also imagine changing this at runtime for example to change the mapping of these SRAMs to use them for suspend/resume or runtime memory rate change, if that ever happens. These use cases require some API in the kernel to control that mapping, exported through a drivers/soc driver. This driver also implement a debugfs file that shows the SRAM found in the system, the current mapping and the SRAM that have been claimed by some drivers in the kernel. Signed-off-by: Maxime Ripard Acked-by: Arnd Bergmann Acked-by: Hans de Goede Tested-by: Hans de Goede Signed-off-by: Arnd Bergmann --- include/linux/soc/sunxi/sunxi_sram.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 include/linux/soc/sunxi/sunxi_sram.h (limited to 'include/linux') diff --git a/include/linux/soc/sunxi/sunxi_sram.h b/include/linux/soc/sunxi/sunxi_sram.h new file mode 100644 index 000000000000..c5f663bba9c2 --- /dev/null +++ b/include/linux/soc/sunxi/sunxi_sram.h @@ -0,0 +1,19 @@ +/* + * Allwinner SoCs SRAM Controller Driver + * + * Copyright (C) 2015 Maxime Ripard + * + * Author: Maxime Ripard + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#ifndef _SUNXI_SRAM_H_ +#define _SUNXI_SRAM_H_ + +int sunxi_sram_claim(struct device *dev); +int sunxi_sram_release(struct device *dev); + +#endif /* _SUNXI_SRAM_H_ */ -- cgit v1.2.3 From f26cdc8536ad50fb802a0445f836b4f94ca09ae7 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 1 Jun 2015 09:29:53 -0600 Subject: blk-mq: Shared tag enhancements Storage controllers may expose multiple block devices that share hardware resources managed by blk-mq. This patch enhances the shared tags so a low-level driver can access the shared resources not tied to the unshared h/w contexts. This way the LLD can dynamically add and delete disks and request queues without having to track all the request_queue hctx's to iterate outstanding tags. Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 2056a99b92f8..37d1602c4f7a 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -96,6 +96,7 @@ typedef void (exit_request_fn)(void *, struct request *, unsigned int, typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, bool); +typedef void (busy_tag_iter_fn)(struct request *, void *, bool); struct blk_mq_ops { /* @@ -182,6 +183,7 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *); struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, bool reserved); struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); +struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags); enum { BLK_MQ_UNIQUE_TAG_BITS = 16, @@ -224,6 +226,8 @@ void blk_mq_run_hw_queues(struct request_queue *q, bool async); void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn, void *priv); +void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, + void *priv); void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_unfreeze_queue(struct request_queue *q); void blk_mq_freeze_queue_start(struct request_queue *q); -- cgit v1.2.3 From bdef7de4b8d9be4cf7bf5aea977f827310ab3ff0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 1 Jun 2015 14:56:09 -0700 Subject: net: Add priority to packet_offload objects. When we scan a packet for GRO processing, we want to see the most common packet types in the front of the offload_base list. So add a priority field so we can handle this properly. IPv4/IPv6 get the highest priority with the implicit zero priority field. Next comes ethernet with a priority of 10, and then we have the MPLS types with a priority of 15. Suggested-by: Eric Dumazet Suggested-by: Toshiaki Makita Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 51f8d2f5dc3f..6f5f71ff5169 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1997,6 +1997,7 @@ struct offload_callbacks { struct packet_offload { __be16 type; /* This is really htons(ether_type). */ + u16 priority; struct offload_callbacks callbacks; struct list_head list; }; -- cgit v1.2.3 From 66e5133f19e901a044fa5eaeeb6ecff4545839e5 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Mon, 1 Jun 2015 21:55:06 +0900 Subject: vlan: Add GRO support for non hardware accelerated vlan Currently packets with non-hardware-accelerated vlan cannot be handled by GRO. This causes low performance for 802.1ad and stacked vlan, as their vlan tags are currently not stripped by hardware. This patch adds GRO support for non-hardware-accelerated vlan and improves receive performance of them. Test Environment: vlan device (.1Q) on vlan device (.1ad) on ixgbe (82599) Result: - Before $ netperf -t TCP_STREAM -H 192.168.20.2 -l 60 Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 60.00 5233.17 Rx side CPU usage: %usr %sys %irq %soft %idle 0.27 58.03 0.00 41.70 0.00 - After $ netperf -t TCP_STREAM -H 192.168.20.2 -l 60 Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 60.00 7586.85 Rx side CPU usage: %usr %sys %irq %soft %idle 0.50 25.83 0.00 59.53 14.14 [ Register VLAN offloads with priority 10 -DaveM ] Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index a40d29846ac2..67ce5bd3b56a 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -628,4 +628,24 @@ static inline netdev_features_t vlan_features_check(const struct sk_buff *skb, return features; } +/** + * compare_vlan_header - Compare two vlan headers + * @h1: Pointer to vlan header + * @h2: Pointer to vlan header + * + * Compare two vlan headers, returns 0 if equal. + * + * Please note that alignment of h1 & h2 are only guaranteed to be 16 bits. + */ +static inline unsigned long compare_vlan_header(const struct vlan_hdr *h1, + const struct vlan_hdr *h2) +{ +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + return *(u32 *)h1 ^ *(u32 *)h2; +#else + return ((__force u32)h1->h_vlan_TCI ^ (__force u32)h2->h_vlan_TCI) | + ((__force u32)h1->h_vlan_encapsulated_proto ^ + (__force u32)h2->h_vlan_encapsulated_proto); +#endif +} #endif /* !(_LINUX_IF_VLAN_H_) */ -- cgit v1.2.3 From 3434d23b694e5cb6e44e966914563406c31c4053 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 21 May 2015 13:33:45 +0530 Subject: clockevents: Add helpers to check the state of a clockevent device Some clockevent drivers, once migrated to use per-state callbacks, need to check the state of the clockevent device in their callbacks or interrupt handler. Add accessor functions clockevent_state_*() to get this information. Signed-off-by: Viresh Kumar Cc: linaro-kernel@lists.linaro.org Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/04a717d490335c688dd7af899fbcede97e1bb8ee.1432192527.git.viresh.kumar@linaro.org Signed-off-by: Thomas Gleixner --- include/linux/clockchips.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 271fa4c8eb29..64214ad85af9 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -149,6 +149,32 @@ struct clock_event_device { struct module *owner; } ____cacheline_aligned; +/* Helpers to verify state of a clockevent device */ +static inline bool clockevent_state_detached(struct clock_event_device *dev) +{ + return dev->state == CLOCK_EVT_STATE_DETACHED; +} + +static inline bool clockevent_state_shutdown(struct clock_event_device *dev) +{ + return dev->state == CLOCK_EVT_STATE_SHUTDOWN; +} + +static inline bool clockevent_state_periodic(struct clock_event_device *dev) +{ + return dev->state == CLOCK_EVT_STATE_PERIODIC; +} + +static inline bool clockevent_state_oneshot(struct clock_event_device *dev) +{ + return dev->state == CLOCK_EVT_STATE_ONESHOT; +} + +static inline bool clockevent_state_oneshot_stopped(struct clock_event_device *dev) +{ + return dev->state == CLOCK_EVT_STATE_ONESHOT_STOPPED; +} + /* * Calculate a multiplication factor for scaled math, which is used to convert * nanoseconds based values to clock ticks: -- cgit v1.2.3 From a97e2d86a9b88ea9e9a280b594b80f0eec2c955b Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Sun, 31 May 2015 17:15:30 -0400 Subject: IB/core cleanup: Add const on args - device->process_mad The process_mad device function declares some parameters as "in". Make those parameters const and adjust the call tree under process_mad in the various drivers accordingly. Signed-off-by: Ira Weiny Reviewed-by: Hal Rosenstock Reviewed-by: Jason Gunthorpe Signed-off-by: Doug Ledford --- include/linux/mlx5/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 9a90e7523dc2..9ec7c93d6fa3 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -696,7 +696,7 @@ int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, u32 *mkey); int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn); int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn); -int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, void *inb, void *outb, +int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, u16 opmod, u8 port); void mlx5_pagealloc_init(struct mlx5_core_dev *dev); void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev); -- cgit v1.2.3 From 11f81becca04bb7d2826a9b65bb8d27b0a1bb543 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:15 -0400 Subject: page_writeback: revive cancel_dirty_page() in a restricted form cancel_dirty_page() had some issues and b9ea25152e56 ("page_writeback: clean up mess around cancel_dirty_page()") replaced it with account_page_cleaned() which makes the caller responsible for clearing the dirty bit; unfortunately, the planned changes for cgroup writeback support requires synchronization between dirty bit manipulation and stat updates. While we can open-code such synchronization in each account_page_cleaned() callsite, that's gonna be unnecessarily awkward and verbose. This patch revives cancel_dirty_page() but in a more restricted form. All it does is TestClearPageDirty() followed by account_page_cleaned() invocation if the page was dirty. This helper covers all account_page_cleaned() usages except for __delete_from_page_cache() which is a special case anyway and left alone. As this leaves no module user for account_page_cleaned(), EXPORT_SYMBOL() is dropped from it. This patch just revives cancel_dirty_page() as a trivial wrapper to replace equivalent usages and doesn't introduce any functional changes. Signed-off-by: Tejun Heo Cc: Konstantin Khlebnikov Signed-off-by: Jens Axboe --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 0755b9fd03a7..a83cf3a6f78e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1215,6 +1215,7 @@ void account_page_dirtied(struct page *page, struct address_space *mapping); void account_page_cleaned(struct page *page, struct address_space *mapping); int set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); +void cancel_dirty_page(struct page *page); int clear_page_dirty_for_io(struct page *page); int get_cmdline(struct task_struct *task, char *buffer, int buflen); -- cgit v1.2.3 From c4843a7593a9df3ff5b1806084cefdfa81dd7c79 Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Fri, 22 May 2015 17:13:16 -0400 Subject: memcg: add per cgroup dirty page accounting When modifying PG_Dirty on cached file pages, update the new MEM_CGROUP_STAT_DIRTY counter. This is done in the same places where global NR_FILE_DIRTY is managed. The new memcg stat is visible in the per memcg memory.stat cgroupfs file. The most recent past attempt at this was http://thread.gmane.org/gmane.linux.kernel.cgroups/8632 The new accounting supports future efforts to add per cgroup dirty page throttling and writeback. It also helps an administrator break down a container's memory usage and provides evidence to understand memcg oom kills (the new dirty count is included in memcg oom kill messages). The ability to move page accounting between memcg (memory.move_charge_at_immigrate) makes this accounting more complicated than the global counter. The existing mem_cgroup_{begin,end}_page_stat() lock is used to serialize move accounting with stat updates. Typical update operation: memcg = mem_cgroup_begin_page_stat(page) if (TestSetPageDirty()) { [...] mem_cgroup_update_page_stat(memcg) } mem_cgroup_end_page_stat(memcg) Summary of mem_cgroup_end_page_stat() overhead: - Without CONFIG_MEMCG it's a no-op - With CONFIG_MEMCG and no inter memcg task movement, it's just rcu_read_lock() - With CONFIG_MEMCG and inter memcg task movement, it's rcu_read_lock() + spin_lock_irqsave() A memcg parameter is added to several routines because their callers now grab mem_cgroup_begin_page_stat() which returns the memcg later needed by for mem_cgroup_update_page_stat(). Because mem_cgroup_begin_page_stat() may disable interrupts, some adjustments are needed: - move __mark_inode_dirty() from __set_page_dirty() to its caller. __mark_inode_dirty() locking does not want interrupts disabled. - use spin_lock_irqsave(tree_lock) rather than spin_lock_irq() in __delete_from_page_cache(), replace_page_cache_page(), invalidate_complete_page2(), and __remove_mapping(). text data bss dec hex filename 8925147 1774832 1785856 12485835 be84cb vmlinux-!CONFIG_MEMCG-before 8925339 1774832 1785856 12486027 be858b vmlinux-!CONFIG_MEMCG-after +192 text bytes 8965977 1784992 1785856 12536825 bf4bf9 vmlinux-CONFIG_MEMCG-before 8966750 1784992 1785856 12537598 bf4efe vmlinux-CONFIG_MEMCG-after +773 text bytes Performance tests run on v4.0-rc1-36-g4f671fe2f952. Lower is better for all metrics, they're all wall clock or cycle counts. The read and write fault benchmarks just measure fault time, they do not include I/O time. * CONFIG_MEMCG not set: baseline patched kbuild 1m25.030000(+-0.088% 3 samples) 1m25.426667(+-0.120% 3 samples) dd write 100 MiB 0.859211561 +-15.10% 0.874162885 +-15.03% dd write 200 MiB 1.670653105 +-17.87% 1.669384764 +-11.99% dd write 1000 MiB 8.434691190 +-14.15% 8.474733215 +-14.77% read fault cycles 254.0(+-0.000% 10 samples) 253.0(+-0.000% 10 samples) write fault cycles 2021.2(+-3.070% 10 samples) 1984.5(+-1.036% 10 samples) * CONFIG_MEMCG=y root_memcg: baseline patched kbuild 1m25.716667(+-0.105% 3 samples) 1m25.686667(+-0.153% 3 samples) dd write 100 MiB 0.855650830 +-14.90% 0.887557919 +-14.90% dd write 200 MiB 1.688322953 +-12.72% 1.667682724 +-13.33% dd write 1000 MiB 8.418601605 +-14.30% 8.673532299 +-15.00% read fault cycles 266.0(+-0.000% 10 samples) 266.0(+-0.000% 10 samples) write fault cycles 2051.7(+-1.349% 10 samples) 2049.6(+-1.686% 10 samples) * CONFIG_MEMCG=y non-root_memcg: baseline patched kbuild 1m26.120000(+-0.273% 3 samples) 1m25.763333(+-0.127% 3 samples) dd write 100 MiB 0.861723964 +-15.25% 0.818129350 +-14.82% dd write 200 MiB 1.669887569 +-13.30% 1.698645885 +-13.27% dd write 1000 MiB 8.383191730 +-14.65% 8.351742280 +-14.52% read fault cycles 265.7(+-0.172% 10 samples) 267.0(+-0.000% 10 samples) write fault cycles 2070.6(+-1.512% 10 samples) 2084.4(+-2.148% 10 samples) As expected anon page faults are not affected by this patch. tj: Updated to apply on top of the recent cancel_dirty_page() changes. Signed-off-by: Sha Zhengju Signed-off-by: Greg Thelen Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/memcontrol.h | 1 + include/linux/mm.h | 6 ++++-- include/linux/pagemap.h | 3 ++- 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 72dff5fb0d0c..5fe6411b5e54 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -41,6 +41,7 @@ enum mem_cgroup_stat_index { MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */ MEM_CGROUP_STAT_RSS_HUGE, /* # of pages charged as anon huge */ MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */ + MEM_CGROUP_STAT_DIRTY, /* # of dirty pages in page cache */ MEM_CGROUP_STAT_WRITEBACK, /* # of pages under writeback */ MEM_CGROUP_STAT_SWAP, /* # of pages, swapped out */ MEM_CGROUP_STAT_NSTATS, diff --git a/include/linux/mm.h b/include/linux/mm.h index a83cf3a6f78e..f48d979ced4b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1211,8 +1211,10 @@ int __set_page_dirty_nobuffers(struct page *page); int __set_page_dirty_no_writeback(struct page *page); int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page); -void account_page_dirtied(struct page *page, struct address_space *mapping); -void account_page_cleaned(struct page *page, struct address_space *mapping); +void account_page_dirtied(struct page *page, struct address_space *mapping, + struct mem_cgroup *memcg); +void account_page_cleaned(struct page *page, struct address_space *mapping, + struct mem_cgroup *memcg); int set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); void cancel_dirty_page(struct page *page); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 4b3736f7065c..fb0814ca65c7 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -651,7 +651,8 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, int add_to_page_cache_lru(struct page *page, struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); extern void delete_from_page_cache(struct page *page); -extern void __delete_from_page_cache(struct page *page, void *shadow); +extern void __delete_from_page_cache(struct page *page, void *shadow, + struct mem_cgroup *memcg); int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask); /* -- cgit v1.2.3 From eea8f41cc58849e354ecf8b95bd7f806e1d1f703 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:17 -0400 Subject: blkcg: move block/blk-cgroup.h to include/linux/blk-cgroup.h cgroup aware writeback support will require exposing some of blkcg details. In preprataion, move block/blk-cgroup.h to include/linux/blk-cgroup.h. This patch is pure file move. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 603 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 603 insertions(+) create mode 100644 include/linux/blk-cgroup.h (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h new file mode 100644 index 000000000000..c567865b5f1d --- /dev/null +++ b/include/linux/blk-cgroup.h @@ -0,0 +1,603 @@ +#ifndef _BLK_CGROUP_H +#define _BLK_CGROUP_H +/* + * Common Block IO controller cgroup interface + * + * Based on ideas and code from CFQ, CFS and BFQ: + * Copyright (C) 2003 Jens Axboe + * + * Copyright (C) 2008 Fabio Checconi + * Paolo Valente + * + * Copyright (C) 2009 Vivek Goyal + * Nauman Rafique + */ + +#include +#include +#include +#include +#include +#include + +/* Max limits for throttle policy */ +#define THROTL_IOPS_MAX UINT_MAX + +/* CFQ specific, out here for blkcg->cfq_weight */ +#define CFQ_WEIGHT_MIN 10 +#define CFQ_WEIGHT_MAX 1000 +#define CFQ_WEIGHT_DEFAULT 500 + +#ifdef CONFIG_BLK_CGROUP + +enum blkg_rwstat_type { + BLKG_RWSTAT_READ, + BLKG_RWSTAT_WRITE, + BLKG_RWSTAT_SYNC, + BLKG_RWSTAT_ASYNC, + + BLKG_RWSTAT_NR, + BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR, +}; + +struct blkcg_gq; + +struct blkcg { + struct cgroup_subsys_state css; + spinlock_t lock; + + struct radix_tree_root blkg_tree; + struct blkcg_gq *blkg_hint; + struct hlist_head blkg_list; + + /* TODO: per-policy storage in blkcg */ + unsigned int cfq_weight; /* belongs to cfq */ + unsigned int cfq_leaf_weight; +}; + +struct blkg_stat { + struct u64_stats_sync syncp; + uint64_t cnt; +}; + +struct blkg_rwstat { + struct u64_stats_sync syncp; + uint64_t cnt[BLKG_RWSTAT_NR]; +}; + +/* + * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a + * request_queue (q). This is used by blkcg policies which need to track + * information per blkcg - q pair. + * + * There can be multiple active blkcg policies and each has its private + * data on each blkg, the size of which is determined by + * blkcg_policy->pd_size. blkcg core allocates and frees such areas + * together with blkg and invokes pd_init/exit_fn() methods. + * + * Such private data must embed struct blkg_policy_data (pd) at the + * beginning and pd_size can't be smaller than pd. + */ +struct blkg_policy_data { + /* the blkg and policy id this per-policy data belongs to */ + struct blkcg_gq *blkg; + int plid; + + /* used during policy activation */ + struct list_head alloc_node; +}; + +/* association between a blk cgroup and a request queue */ +struct blkcg_gq { + /* Pointer to the associated request_queue */ + struct request_queue *q; + struct list_head q_node; + struct hlist_node blkcg_node; + struct blkcg *blkcg; + + /* all non-root blkcg_gq's are guaranteed to have access to parent */ + struct blkcg_gq *parent; + + /* request allocation list for this blkcg-q pair */ + struct request_list rl; + + /* reference count */ + atomic_t refcnt; + + /* is this blkg online? protected by both blkcg and q locks */ + bool online; + + struct blkg_policy_data *pd[BLKCG_MAX_POLS]; + + struct rcu_head rcu_head; +}; + +typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg); +typedef void (blkcg_pol_online_pd_fn)(struct blkcg_gq *blkg); +typedef void (blkcg_pol_offline_pd_fn)(struct blkcg_gq *blkg); +typedef void (blkcg_pol_exit_pd_fn)(struct blkcg_gq *blkg); +typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkcg_gq *blkg); + +struct blkcg_policy { + int plid; + /* policy specific private data size */ + size_t pd_size; + /* cgroup files for the policy */ + struct cftype *cftypes; + + /* operations */ + blkcg_pol_init_pd_fn *pd_init_fn; + blkcg_pol_online_pd_fn *pd_online_fn; + blkcg_pol_offline_pd_fn *pd_offline_fn; + blkcg_pol_exit_pd_fn *pd_exit_fn; + blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; +}; + +extern struct blkcg blkcg_root; + +struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q); +struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, + struct request_queue *q); +int blkcg_init_queue(struct request_queue *q); +void blkcg_drain_queue(struct request_queue *q); +void blkcg_exit_queue(struct request_queue *q); + +/* Blkio controller policy registration */ +int blkcg_policy_register(struct blkcg_policy *pol); +void blkcg_policy_unregister(struct blkcg_policy *pol); +int blkcg_activate_policy(struct request_queue *q, + const struct blkcg_policy *pol); +void blkcg_deactivate_policy(struct request_queue *q, + const struct blkcg_policy *pol); + +void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, + u64 (*prfill)(struct seq_file *, + struct blkg_policy_data *, int), + const struct blkcg_policy *pol, int data, + bool show_total); +u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); +u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, + const struct blkg_rwstat *rwstat); +u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off); +u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, + int off); + +u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off); +struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd, + int off); + +struct blkg_conf_ctx { + struct gendisk *disk; + struct blkcg_gq *blkg; + u64 v; +}; + +int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, + const char *input, struct blkg_conf_ctx *ctx); +void blkg_conf_finish(struct blkg_conf_ctx *ctx); + + +static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) +{ + return css ? container_of(css, struct blkcg, css) : NULL; +} + +static inline struct blkcg *task_blkcg(struct task_struct *tsk) +{ + return css_to_blkcg(task_css(tsk, blkio_cgrp_id)); +} + +static inline struct blkcg *bio_blkcg(struct bio *bio) +{ + if (bio && bio->bi_css) + return css_to_blkcg(bio->bi_css); + return task_blkcg(current); +} + +/** + * blkcg_parent - get the parent of a blkcg + * @blkcg: blkcg of interest + * + * Return the parent blkcg of @blkcg. Can be called anytime. + */ +static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) +{ + return css_to_blkcg(blkcg->css.parent); +} + +/** + * blkg_to_pdata - get policy private data + * @blkg: blkg of interest + * @pol: policy of interest + * + * Return pointer to private data associated with the @blkg-@pol pair. + */ +static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, + struct blkcg_policy *pol) +{ + return blkg ? blkg->pd[pol->plid] : NULL; +} + +/** + * pdata_to_blkg - get blkg associated with policy private data + * @pd: policy private data of interest + * + * @pd is policy private data. Determine the blkg it's associated with. + */ +static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) +{ + return pd ? pd->blkg : NULL; +} + +/** + * blkg_path - format cgroup path of blkg + * @blkg: blkg of interest + * @buf: target buffer + * @buflen: target buffer length + * + * Format the path of the cgroup of @blkg into @buf. + */ +static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) +{ + char *p; + + p = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen); + if (!p) { + strncpy(buf, "", buflen); + return -ENAMETOOLONG; + } + + memmove(buf, p, buf + buflen - p); + return 0; +} + +/** + * blkg_get - get a blkg reference + * @blkg: blkg to get + * + * The caller should be holding an existing reference. + */ +static inline void blkg_get(struct blkcg_gq *blkg) +{ + WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); + atomic_inc(&blkg->refcnt); +} + +void __blkg_release_rcu(struct rcu_head *rcu); + +/** + * blkg_put - put a blkg reference + * @blkg: blkg to put + */ +static inline void blkg_put(struct blkcg_gq *blkg) +{ + WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); + if (atomic_dec_and_test(&blkg->refcnt)) + call_rcu(&blkg->rcu_head, __blkg_release_rcu); +} + +struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q, + bool update_hint); + +/** + * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants + * @d_blkg: loop cursor pointing to the current descendant + * @pos_css: used for iteration + * @p_blkg: target blkg to walk descendants of + * + * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU + * read locked. If called under either blkcg or queue lock, the iteration + * is guaranteed to include all and only online blkgs. The caller may + * update @pos_css by calling css_rightmost_descendant() to skip subtree. + * @p_blkg is included in the iteration and the first node to be visited. + */ +#define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \ + css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \ + if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ + (p_blkg)->q, false))) + +/** + * blkg_for_each_descendant_post - post-order walk of a blkg's descendants + * @d_blkg: loop cursor pointing to the current descendant + * @pos_css: used for iteration + * @p_blkg: target blkg to walk descendants of + * + * Similar to blkg_for_each_descendant_pre() but performs post-order + * traversal instead. Synchronization rules are the same. @p_blkg is + * included in the iteration and the last node to be visited. + */ +#define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \ + css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \ + if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ + (p_blkg)->q, false))) + +/** + * blk_get_rl - get request_list to use + * @q: request_queue of interest + * @bio: bio which will be attached to the allocated request (may be %NULL) + * + * The caller wants to allocate a request from @q to use for @bio. Find + * the request_list to use and obtain a reference on it. Should be called + * under queue_lock. This function is guaranteed to return non-%NULL + * request_list. + */ +static inline struct request_list *blk_get_rl(struct request_queue *q, + struct bio *bio) +{ + struct blkcg *blkcg; + struct blkcg_gq *blkg; + + rcu_read_lock(); + + blkcg = bio_blkcg(bio); + + /* bypass blkg lookup and use @q->root_rl directly for root */ + if (blkcg == &blkcg_root) + goto root_rl; + + /* + * Try to use blkg->rl. blkg lookup may fail under memory pressure + * or if either the blkcg or queue is going away. Fall back to + * root_rl in such cases. + */ + blkg = blkg_lookup_create(blkcg, q); + if (unlikely(IS_ERR(blkg))) + goto root_rl; + + blkg_get(blkg); + rcu_read_unlock(); + return &blkg->rl; +root_rl: + rcu_read_unlock(); + return &q->root_rl; +} + +/** + * blk_put_rl - put request_list + * @rl: request_list to put + * + * Put the reference acquired by blk_get_rl(). Should be called under + * queue_lock. + */ +static inline void blk_put_rl(struct request_list *rl) +{ + /* root_rl may not have blkg set */ + if (rl->blkg && rl->blkg->blkcg != &blkcg_root) + blkg_put(rl->blkg); +} + +/** + * blk_rq_set_rl - associate a request with a request_list + * @rq: request of interest + * @rl: target request_list + * + * Associate @rq with @rl so that accounting and freeing can know the + * request_list @rq came from. + */ +static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) +{ + rq->rl = rl; +} + +/** + * blk_rq_rl - return the request_list a request came from + * @rq: request of interest + * + * Return the request_list @rq is allocated from. + */ +static inline struct request_list *blk_rq_rl(struct request *rq) +{ + return rq->rl; +} + +struct request_list *__blk_queue_next_rl(struct request_list *rl, + struct request_queue *q); +/** + * blk_queue_for_each_rl - iterate through all request_lists of a request_queue + * + * Should be used under queue_lock. + */ +#define blk_queue_for_each_rl(rl, q) \ + for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q))) + +static inline void blkg_stat_init(struct blkg_stat *stat) +{ + u64_stats_init(&stat->syncp); +} + +/** + * blkg_stat_add - add a value to a blkg_stat + * @stat: target blkg_stat + * @val: value to add + * + * Add @val to @stat. The caller is responsible for synchronizing calls to + * this function. + */ +static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val) +{ + u64_stats_update_begin(&stat->syncp); + stat->cnt += val; + u64_stats_update_end(&stat->syncp); +} + +/** + * blkg_stat_read - read the current value of a blkg_stat + * @stat: blkg_stat to read + * + * Read the current value of @stat. This function can be called without + * synchroniztion and takes care of u64 atomicity. + */ +static inline uint64_t blkg_stat_read(struct blkg_stat *stat) +{ + unsigned int start; + uint64_t v; + + do { + start = u64_stats_fetch_begin_irq(&stat->syncp); + v = stat->cnt; + } while (u64_stats_fetch_retry_irq(&stat->syncp, start)); + + return v; +} + +/** + * blkg_stat_reset - reset a blkg_stat + * @stat: blkg_stat to reset + */ +static inline void blkg_stat_reset(struct blkg_stat *stat) +{ + stat->cnt = 0; +} + +/** + * blkg_stat_merge - merge a blkg_stat into another + * @to: the destination blkg_stat + * @from: the source + * + * Add @from's count to @to. + */ +static inline void blkg_stat_merge(struct blkg_stat *to, struct blkg_stat *from) +{ + blkg_stat_add(to, blkg_stat_read(from)); +} + +static inline void blkg_rwstat_init(struct blkg_rwstat *rwstat) +{ + u64_stats_init(&rwstat->syncp); +} + +/** + * blkg_rwstat_add - add a value to a blkg_rwstat + * @rwstat: target blkg_rwstat + * @rw: mask of REQ_{WRITE|SYNC} + * @val: value to add + * + * Add @val to @rwstat. The counters are chosen according to @rw. The + * caller is responsible for synchronizing calls to this function. + */ +static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, + int rw, uint64_t val) +{ + u64_stats_update_begin(&rwstat->syncp); + + if (rw & REQ_WRITE) + rwstat->cnt[BLKG_RWSTAT_WRITE] += val; + else + rwstat->cnt[BLKG_RWSTAT_READ] += val; + if (rw & REQ_SYNC) + rwstat->cnt[BLKG_RWSTAT_SYNC] += val; + else + rwstat->cnt[BLKG_RWSTAT_ASYNC] += val; + + u64_stats_update_end(&rwstat->syncp); +} + +/** + * blkg_rwstat_read - read the current values of a blkg_rwstat + * @rwstat: blkg_rwstat to read + * + * Read the current snapshot of @rwstat and return it as the return value. + * This function can be called without synchronization and takes care of + * u64 atomicity. + */ +static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat) +{ + unsigned int start; + struct blkg_rwstat tmp; + + do { + start = u64_stats_fetch_begin_irq(&rwstat->syncp); + tmp = *rwstat; + } while (u64_stats_fetch_retry_irq(&rwstat->syncp, start)); + + return tmp; +} + +/** + * blkg_rwstat_total - read the total count of a blkg_rwstat + * @rwstat: blkg_rwstat to read + * + * Return the total count of @rwstat regardless of the IO direction. This + * function can be called without synchronization and takes care of u64 + * atomicity. + */ +static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat) +{ + struct blkg_rwstat tmp = blkg_rwstat_read(rwstat); + + return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]; +} + +/** + * blkg_rwstat_reset - reset a blkg_rwstat + * @rwstat: blkg_rwstat to reset + */ +static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat) +{ + memset(rwstat->cnt, 0, sizeof(rwstat->cnt)); +} + +/** + * blkg_rwstat_merge - merge a blkg_rwstat into another + * @to: the destination blkg_rwstat + * @from: the source + * + * Add @from's counts to @to. + */ +static inline void blkg_rwstat_merge(struct blkg_rwstat *to, + struct blkg_rwstat *from) +{ + struct blkg_rwstat v = blkg_rwstat_read(from); + int i; + + u64_stats_update_begin(&to->syncp); + for (i = 0; i < BLKG_RWSTAT_NR; i++) + to->cnt[i] += v.cnt[i]; + u64_stats_update_end(&to->syncp); +} + +#else /* CONFIG_BLK_CGROUP */ + +struct cgroup; +struct blkcg; + +struct blkg_policy_data { +}; + +struct blkcg_gq { +}; + +struct blkcg_policy { +}; + +static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } +static inline int blkcg_init_queue(struct request_queue *q) { return 0; } +static inline void blkcg_drain_queue(struct request_queue *q) { } +static inline void blkcg_exit_queue(struct request_queue *q) { } +static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; } +static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { } +static inline int blkcg_activate_policy(struct request_queue *q, + const struct blkcg_policy *pol) { return 0; } +static inline void blkcg_deactivate_policy(struct request_queue *q, + const struct blkcg_policy *pol) { } + +static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } + +static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, + struct blkcg_policy *pol) { return NULL; } +static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; } +static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } +static inline void blkg_get(struct blkcg_gq *blkg) { } +static inline void blkg_put(struct blkcg_gq *blkg) { } + +static inline struct request_list *blk_get_rl(struct request_queue *q, + struct bio *bio) { return &q->root_rl; } +static inline void blk_put_rl(struct request_list *rl) { } +static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } +static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } + +#define blk_queue_for_each_rl(rl, q) \ + for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) + +#endif /* CONFIG_BLK_CGROUP */ +#endif /* _BLK_CGROUP_H */ -- cgit v1.2.3 From efa7d1c733d1d2c1a468b85126d70bad9fdf6ba8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:18 -0400 Subject: update !CONFIG_BLK_CGROUP dummies in include/linux/blk-cgroup.h The header file will be used more widely with the pending cgroup writeback support and the current set of dummy declarations aren't enough to handle different config combinations. Update as follows. * Drop the struct cgroup declaration. None of the dummy defs need it. * Define blkcg as an empty struct instead of just declaring it. * Wrap dummy function defs in CONFIG_BLOCK. Some functions use block data types and none of them are to be used w/o block enabled. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index c567865b5f1d..51f95b34d3f0 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -558,8 +558,8 @@ static inline void blkg_rwstat_merge(struct blkg_rwstat *to, #else /* CONFIG_BLK_CGROUP */ -struct cgroup; -struct blkcg; +struct blkcg { +}; struct blkg_policy_data { }; @@ -570,6 +570,8 @@ struct blkcg_gq { struct blkcg_policy { }; +#ifdef CONFIG_BLOCK + static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } static inline int blkcg_init_queue(struct request_queue *q) { return 0; } static inline void blkcg_drain_queue(struct request_queue *q) { } @@ -599,5 +601,6 @@ static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q #define blk_queue_for_each_rl(rl, q) \ for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) +#endif /* CONFIG_BLOCK */ #endif /* CONFIG_BLK_CGROUP */ #endif /* _BLK_CGROUP_H */ -- cgit v1.2.3 From 56161634e4824380a67243a4cf3fa52eb1e5d836 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:20 -0400 Subject: memcg: add mem_cgroup_root_css Add global mem_cgroup_root_css which points to the root memcg css. This will be used by cgroup writeback support. If memcg is disabled, it's defined as ERR_PTR(-EINVAL). Signed-off-by: Tejun Heo Cc: Johannes Weiner aCc: Michal Hocko Signed-off-by: Jens Axboe --- include/linux/memcontrol.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5fe6411b5e54..294498f4f6fc 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -68,6 +68,8 @@ enum mem_cgroup_events_index { }; #ifdef CONFIG_MEMCG +extern struct cgroup_subsys_state *mem_cgroup_root_css; + void mem_cgroup_events(struct mem_cgroup *memcg, enum mem_cgroup_events_index idx, unsigned int nr); @@ -196,6 +198,8 @@ void mem_cgroup_split_huge_fixup(struct page *head); #else /* CONFIG_MEMCG */ struct mem_cgroup; +#define mem_cgroup_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL)) + static inline void mem_cgroup_events(struct mem_cgroup *memcg, enum mem_cgroup_events_index idx, unsigned int nr) -- cgit v1.2.3 From 496d5e7560dbb84399dbd92316fc33857aa83900 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:21 -0400 Subject: blkcg: add blkcg_root_css Add global constant blkcg_root_css which points to &blkcg_root.css. This will be used by cgroup writeback support. If blkcg is disabled, it's defined as ERR_PTR(-EINVAL). v2: The declarations moved to include/linux/blk-cgroup.h as suggested by Vivek. Signed-off-by: Tejun Heo Cc: Vivek Goyal Cc: Jens Axboe Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 51f95b34d3f0..65f0c178fd04 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -134,6 +134,7 @@ struct blkcg_policy { }; extern struct blkcg blkcg_root; +extern struct cgroup_subsys_state * const blkcg_root_css; struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q); struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, @@ -570,6 +571,8 @@ struct blkcg_gq { struct blkcg_policy { }; +#define blkcg_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL)) + #ifdef CONFIG_BLOCK static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } -- cgit v1.2.3 From ec438699a9ae0856c2ce20a50dd39cdc7e92a732 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:22 -0400 Subject: cgroup, block: implement task_get_css() and use it in bio_associate_current() bio_associate_current() currently open codes task_css() and css_tryget_online() to find and pin $current's blkcg css. Abstract it into task_get_css() which is implemented from cgroup side. As a task is always associated with an online css for every subsystem except while the css_set update is propagating, task_get_css() retries till css_tryget_online() succeeds. This is a cleanup and shouldn't lead to noticeable behavior changes. Signed-off-by: Tejun Heo Cc: Li Zefan Cc: Jens Axboe Cc: Vivek Goyal Signed-off-by: Jens Axboe --- include/linux/cgroup.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b9cb94c3102a..e7da0aa65b2d 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -773,6 +773,31 @@ static inline struct cgroup_subsys_state *task_css(struct task_struct *task, return task_css_check(task, subsys_id, false); } +/** + * task_get_css - find and get the css for (task, subsys) + * @task: the target task + * @subsys_id: the target subsystem ID + * + * Find the css for the (@task, @subsys_id) combination, increment a + * reference on and return it. This function is guaranteed to return a + * valid css. + */ +static inline struct cgroup_subsys_state * +task_get_css(struct task_struct *task, int subsys_id) +{ + struct cgroup_subsys_state *css; + + rcu_read_lock(); + while (true) { + css = task_css(task, subsys_id); + if (likely(css_tryget_online(css))) + break; + cpu_relax(); + } + rcu_read_unlock(); + return css; +} + /** * task_css_is_root - test whether a task belongs to the root css * @task: the target task -- cgit v1.2.3 From fd383c2d3cae146337cea809de0d622b8b887e6c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:23 -0400 Subject: blkcg: implement task_get_blkcg_css() Implement a wrapper around task_get_css() to acquire the blkcg css for a given task. The wrapper is necessary for cgroup writeback support as there will be places outside blkcg proper trying to acquire blkcg_css and blkio_cgrp_id will be undefined when !CONFIG_BLK_CGROUP. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 65f0c178fd04..4dc643f2046e 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -195,6 +195,12 @@ static inline struct blkcg *bio_blkcg(struct bio *bio) return task_blkcg(current); } +static inline struct cgroup_subsys_state * +task_get_blkcg_css(struct task_struct *task) +{ + return task_get_css(task, blkio_cgrp_id); +} + /** * blkcg_parent - get the parent of a blkcg * @blkcg: blkcg of interest @@ -573,6 +579,12 @@ struct blkcg_policy { #define blkcg_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL)) +static inline struct cgroup_subsys_state * +task_get_blkcg_css(struct task_struct *task) +{ + return NULL; +} + #ifdef CONFIG_BLOCK static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } -- cgit v1.2.3 From 1d933cf096e3aea15f1aec8297657b7a846fab63 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:24 -0400 Subject: blkcg: implement bio_associate_blkcg() Currently, a bio can only be associated with the io_context and blkcg of %current using bio_associate_current(). This is too restrictive for cgroup writeback support. Implement bio_associate_blkcg() which associates a bio with the specified blkcg. bio_associate_blkcg() leaves the io_context unassociated. bio_associate_current() is updated so that it considers a bio as already associated if it has a blkcg_css, instead of an io_context, associated with it. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Vivek Goyal Signed-off-by: Jens Axboe --- include/linux/bio.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index f0291cf64cc5..5e963a6d7c14 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -482,9 +482,12 @@ extern void bvec_free(mempool_t *, struct bio_vec *, unsigned int); extern unsigned int bvec_nr_vecs(unsigned short idx); #ifdef CONFIG_BLK_CGROUP +int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css); int bio_associate_current(struct bio *bio); void bio_disassociate_task(struct bio *bio); #else /* CONFIG_BLK_CGROUP */ +static inline int bio_associate_blkcg(struct bio *bio, + struct cgroup_subsys_state *blkcg_css) { return 0; } static inline int bio_associate_current(struct bio *bio) { return -ENOENT; } static inline void bio_disassociate_task(struct bio *bio) { } #endif /* CONFIG_BLK_CGROUP */ -- cgit v1.2.3 From ad7fa852d3d2816d68a138ebc5bc8967aeb7fd86 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 May 2015 20:00:02 -0400 Subject: memcg: implement mem_cgroup_css_from_page() Implement mem_cgroup_css_from_page() which returns the cgroup_subsys_state of the memcg associated with a given page on the default hierarchy. This will be used by cgroup writeback support. This function assumes that page->mem_cgroup association doesn't change until the page is released, which is true on the default hierarchy as long as replace_page_cache_page() is not used. As the only user of replace_page_cache_page() is FUSE which won't support cgroup writeback for the time being, this works for now, and replace_page_cache_page() will soon be updated so that the invariant actually holds. Note that the RCU protected page->mem_cgroup access is consistent with other usages across memcg but ultimately incorrect. These unlocked accesses are missing required barriers. page->mem_cgroup should be made an RCU pointer and updated and accessed using RCU operations. v4: Instead of triggering WARN, return the root css on the traditional hierarchies. This makes the function a lot easier to deal with especially as there's no light way to synchronize against hierarchy rebinding. v3: s/mem_cgroup_migrate()/mem_cgroup_css_from_page()/ v2: Trigger WARN if the function is used on the traditional hierarchies and add comment about the assumed invariant. Signed-off-by: Tejun Heo Cc: Johannes Weiner Cc: Michal Hocko Signed-off-by: Jens Axboe --- include/linux/memcontrol.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 294498f4f6fc..637ef626008e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -115,6 +115,7 @@ static inline bool mm_match_cgroup(struct mm_struct *mm, } extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg); +extern struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page); struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *, struct mem_cgroup *, -- cgit v1.2.3 From 4452226ea276e74fc3e252c88d9bb7e8f8e44bf0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:26 -0400 Subject: writeback: move backing_dev_info->state into bdi_writeback Currently, a bdi (backing_dev_info) embeds single wb (bdi_writeback) and the role of the separation is unclear. For cgroup support for writeback IOs, a bdi will be updated to host multiple wb's where each wb serves writeback IOs of a different cgroup on the bdi. To achieve that, a wb should carry all states necessary for servicing writeback IOs for a cgroup independently. This patch moves bdi->state into wb. * enum bdi_state is renamed to wb_state and the prefix of all enums is changed from BDI_ to WB_. * Explicit zeroing of bdi->state is removed without adding zeoring of wb->state as the whole data structure is zeroed on init anyway. * As there's still only one bdi_writeback per backing_dev_info, all uses of bdi->state are mechanically replaced with bdi->wb.state introducing no behavior changes. Signed-off-by: Tejun Heo Reviewed-by: Jan Kara Cc: Jens Axboe Cc: Wu Fengguang Cc: drbd-dev@lists.linbit.com Cc: Neil Brown Cc: Alasdair Kergon Cc: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index aff923ae8c4b..eb14f988a63e 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -25,13 +25,13 @@ struct device; struct dentry; /* - * Bits in backing_dev_info.state + * Bits in bdi_writeback.state */ -enum bdi_state { - BDI_async_congested, /* The async (write) queue is getting full */ - BDI_sync_congested, /* The sync queue is getting full */ - BDI_registered, /* bdi_register() was done */ - BDI_writeback_running, /* Writeback is in progress */ +enum wb_state { + WB_async_congested, /* The async (write) queue is getting full */ + WB_sync_congested, /* The sync queue is getting full */ + WB_registered, /* bdi_register() was done */ + WB_writeback_running, /* Writeback is in progress */ }; typedef int (congested_fn)(void *, int); @@ -49,6 +49,7 @@ enum bdi_stat_item { struct bdi_writeback { struct backing_dev_info *bdi; /* our parent bdi */ + unsigned long state; /* Always use atomic bitops on this */ unsigned long last_old_flush; /* last old data flush */ struct delayed_work dwork; /* work item used for writeback */ @@ -62,7 +63,6 @@ struct bdi_writeback { struct backing_dev_info { struct list_head bdi_list; unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */ - unsigned long state; /* Always use atomic bitops on this */ unsigned int capabilities; /* Device capabilities */ congested_fn *congested_fn; /* Function pointer if device is md/dm */ void *congested_data; /* Pointer to aux data for congested func */ @@ -250,23 +250,23 @@ static inline int bdi_congested(struct backing_dev_info *bdi, int bdi_bits) { if (bdi->congested_fn) return bdi->congested_fn(bdi->congested_data, bdi_bits); - return (bdi->state & bdi_bits); + return (bdi->wb.state & bdi_bits); } static inline int bdi_read_congested(struct backing_dev_info *bdi) { - return bdi_congested(bdi, 1 << BDI_sync_congested); + return bdi_congested(bdi, 1 << WB_sync_congested); } static inline int bdi_write_congested(struct backing_dev_info *bdi) { - return bdi_congested(bdi, 1 << BDI_async_congested); + return bdi_congested(bdi, 1 << WB_async_congested); } static inline int bdi_rw_congested(struct backing_dev_info *bdi) { - return bdi_congested(bdi, (1 << BDI_sync_congested) | - (1 << BDI_async_congested)); + return bdi_congested(bdi, (1 << WB_sync_congested) | + (1 << WB_async_congested)); } enum { -- cgit v1.2.3 From 93f78d882865cb90020d0f80a9523c99cf46924c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:27 -0400 Subject: writeback: move backing_dev_info->bdi_stat[] into bdi_writeback Currently, a bdi (backing_dev_info) embeds single wb (bdi_writeback) and the role of the separation is unclear. For cgroup support for writeback IOs, a bdi will be updated to host multiple wb's where each wb serves writeback IOs of a different cgroup on the bdi. To achieve that, a wb should carry all states necessary for servicing writeback IOs for a cgroup independently. This patch moves bdi->bdi_stat[] into wb. * enum bdi_stat_item is renamed to wb_stat_item and the prefix of all enums is changed from BDI_ to WB_. * BDI_STAT_BATCH() -> WB_STAT_BATCH() * [__]{add|inc|dec|sum}_wb_stat(bdi, ...) -> [__]{add|inc}_wb_stat(wb, ...) * bdi_stat[_error]() -> wb_stat[_error]() * bdi_writeout_inc() -> wb_writeout_inc() * stat init is moved to bdi_wb_init() and bdi_wb_exit() is added and frees stat. * As there's still only one bdi_writeback per backing_dev_info, all uses of bdi->stat[] are mechanically replaced with bdi->wb.stat[] introducing no behavior changes. Signed-off-by: Tejun Heo Reviewed-by: Jan Kara Cc: Jens Axboe Cc: Wu Fengguang Cc: Miklos Szeredi Cc: Trond Myklebust Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 68 +++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index eb14f988a63e..fe7a907a4e16 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -36,15 +36,15 @@ enum wb_state { typedef int (congested_fn)(void *, int); -enum bdi_stat_item { - BDI_RECLAIMABLE, - BDI_WRITEBACK, - BDI_DIRTIED, - BDI_WRITTEN, - NR_BDI_STAT_ITEMS +enum wb_stat_item { + WB_RECLAIMABLE, + WB_WRITEBACK, + WB_DIRTIED, + WB_WRITTEN, + NR_WB_STAT_ITEMS }; -#define BDI_STAT_BATCH (8*(1+ilog2(nr_cpu_ids))) +#define WB_STAT_BATCH (8*(1+ilog2(nr_cpu_ids))) struct bdi_writeback { struct backing_dev_info *bdi; /* our parent bdi */ @@ -58,6 +58,8 @@ struct bdi_writeback { struct list_head b_more_io; /* parked for more writeback */ struct list_head b_dirty_time; /* time stamps are dirty */ spinlock_t list_lock; /* protects the b_* lists */ + + struct percpu_counter stat[NR_WB_STAT_ITEMS]; }; struct backing_dev_info { @@ -69,8 +71,6 @@ struct backing_dev_info { char *name; - struct percpu_counter bdi_stat[NR_BDI_STAT_ITEMS]; - unsigned long bw_time_stamp; /* last time write bw is updated */ unsigned long dirtied_stamp; unsigned long written_stamp; /* pages written at bw_time_stamp */ @@ -137,78 +137,74 @@ static inline int wb_has_dirty_io(struct bdi_writeback *wb) !list_empty(&wb->b_more_io); } -static inline void __add_bdi_stat(struct backing_dev_info *bdi, - enum bdi_stat_item item, s64 amount) +static inline void __add_wb_stat(struct bdi_writeback *wb, + enum wb_stat_item item, s64 amount) { - __percpu_counter_add(&bdi->bdi_stat[item], amount, BDI_STAT_BATCH); + __percpu_counter_add(&wb->stat[item], amount, WB_STAT_BATCH); } -static inline void __inc_bdi_stat(struct backing_dev_info *bdi, - enum bdi_stat_item item) +static inline void __inc_wb_stat(struct bdi_writeback *wb, + enum wb_stat_item item) { - __add_bdi_stat(bdi, item, 1); + __add_wb_stat(wb, item, 1); } -static inline void inc_bdi_stat(struct backing_dev_info *bdi, - enum bdi_stat_item item) +static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) { unsigned long flags; local_irq_save(flags); - __inc_bdi_stat(bdi, item); + __inc_wb_stat(wb, item); local_irq_restore(flags); } -static inline void __dec_bdi_stat(struct backing_dev_info *bdi, - enum bdi_stat_item item) +static inline void __dec_wb_stat(struct bdi_writeback *wb, + enum wb_stat_item item) { - __add_bdi_stat(bdi, item, -1); + __add_wb_stat(wb, item, -1); } -static inline void dec_bdi_stat(struct backing_dev_info *bdi, - enum bdi_stat_item item) +static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) { unsigned long flags; local_irq_save(flags); - __dec_bdi_stat(bdi, item); + __dec_wb_stat(wb, item); local_irq_restore(flags); } -static inline s64 bdi_stat(struct backing_dev_info *bdi, - enum bdi_stat_item item) +static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) { - return percpu_counter_read_positive(&bdi->bdi_stat[item]); + return percpu_counter_read_positive(&wb->stat[item]); } -static inline s64 __bdi_stat_sum(struct backing_dev_info *bdi, - enum bdi_stat_item item) +static inline s64 __wb_stat_sum(struct bdi_writeback *wb, + enum wb_stat_item item) { - return percpu_counter_sum_positive(&bdi->bdi_stat[item]); + return percpu_counter_sum_positive(&wb->stat[item]); } -static inline s64 bdi_stat_sum(struct backing_dev_info *bdi, - enum bdi_stat_item item) +static inline s64 wb_stat_sum(struct bdi_writeback *wb, enum wb_stat_item item) { s64 sum; unsigned long flags; local_irq_save(flags); - sum = __bdi_stat_sum(bdi, item); + sum = __wb_stat_sum(wb, item); local_irq_restore(flags); return sum; } -extern void bdi_writeout_inc(struct backing_dev_info *bdi); +extern void wb_writeout_inc(struct bdi_writeback *wb); /* * maximal error of a stat counter. */ -static inline unsigned long bdi_stat_error(struct backing_dev_info *bdi) +static inline unsigned long wb_stat_error(struct bdi_writeback *wb) { #ifdef CONFIG_SMP - return nr_cpu_ids * BDI_STAT_BATCH; + return nr_cpu_ids * WB_STAT_BATCH; #else return 1; #endif -- cgit v1.2.3 From a88a341a73be4ef035ca26170c849f002797da27 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:28 -0400 Subject: writeback: move bandwidth related fields from backing_dev_info into bdi_writeback Currently, a bdi (backing_dev_info) embeds single wb (bdi_writeback) and the role of the separation is unclear. For cgroup support for writeback IOs, a bdi will be updated to host multiple wb's where each wb serves writeback IOs of a different cgroup on the bdi. To achieve that, a wb should carry all states necessary for servicing writeback IOs for a cgroup independently. This patch moves bandwidth related fields from backing_dev_info into bdi_writeback. * The moved fields are: bw_time_stamp, dirtied_stamp, written_stamp, write_bandwidth, avg_write_bandwidth, dirty_ratelimit, balanced_dirty_ratelimit, completions and dirty_exceeded. * writeback_chunk_size() and over_bground_thresh() now take @wb instead of @bdi. * bdi_writeout_fraction(bdi, ...) -> wb_writeout_fraction(wb, ...) bdi_dirty_limit(bdi, ...) -> wb_dirty_limit(wb, ...) bdi_position_ration(bdi, ...) -> wb_position_ratio(wb, ...) bdi_update_writebandwidth(bdi, ...) -> wb_update_write_bandwidth(wb, ...) [__]bdi_update_bandwidth(bdi, ...) -> [__]wb_update_bandwidth(wb, ...) bdi_{max|min}_pause(bdi, ...) -> wb_{max|min}_pause(wb, ...) bdi_dirty_limits(bdi, ...) -> wb_dirty_limits(wb, ...) * Init/exits of the relocated fields are moved to bdi_wb_init/exit() respectively. Note that explicit zeroing is dropped in the process as wb's are cleared in entirety anyway. * As there's still only one bdi_writeback per backing_dev_info, all uses of bdi->stat[] are mechanically replaced with bdi->wb.stat[] introducing no behavior changes. v2: Typo in description fixed as suggested by Jan. Signed-off-by: Tejun Heo Reviewed-by: Jan Kara Cc: Jens Axboe Cc: Wu Fengguang Cc: Jaegeuk Kim Cc: Steven Whitehouse Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 20 ++++++++++---------- include/linux/writeback.h | 19 +++++++++---------- 2 files changed, 19 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index fe7a907a4e16..2ab06049d812 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -60,16 +60,6 @@ struct bdi_writeback { spinlock_t list_lock; /* protects the b_* lists */ struct percpu_counter stat[NR_WB_STAT_ITEMS]; -}; - -struct backing_dev_info { - struct list_head bdi_list; - unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */ - unsigned int capabilities; /* Device capabilities */ - congested_fn *congested_fn; /* Function pointer if device is md/dm */ - void *congested_data; /* Pointer to aux data for congested func */ - - char *name; unsigned long bw_time_stamp; /* last time write bw is updated */ unsigned long dirtied_stamp; @@ -88,6 +78,16 @@ struct backing_dev_info { struct fprop_local_percpu completions; int dirty_exceeded; +}; + +struct backing_dev_info { + struct list_head bdi_list; + unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */ + unsigned int capabilities; /* Device capabilities */ + congested_fn *congested_fn; /* Function pointer if device is md/dm */ + void *congested_data; /* Pointer to aux data for congested func */ + + char *name; unsigned int min_ratio; unsigned int max_ratio, max_prop_frac; diff --git a/include/linux/writeback.h b/include/linux/writeback.h index b2dd371ec0ca..a6b9db7fcee8 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -155,16 +155,15 @@ int dirty_writeback_centisecs_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); -unsigned long bdi_dirty_limit(struct backing_dev_info *bdi, - unsigned long dirty); - -void __bdi_update_bandwidth(struct backing_dev_info *bdi, - unsigned long thresh, - unsigned long bg_thresh, - unsigned long dirty, - unsigned long bdi_thresh, - unsigned long bdi_dirty, - unsigned long start_time); +unsigned long wb_dirty_limit(struct bdi_writeback *wb, unsigned long dirty); + +void __wb_update_bandwidth(struct bdi_writeback *wb, + unsigned long thresh, + unsigned long bg_thresh, + unsigned long dirty, + unsigned long bdi_thresh, + unsigned long bdi_dirty, + unsigned long start_time); void page_writeback_init(void); void balance_dirty_pages_ratelimited(struct address_space *mapping); -- cgit v1.2.3 From f0054bb1e1f3be03cc33369df640db97f10f6172 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:30 -0400 Subject: writeback: move backing_dev_info->wb_lock and ->worklist into bdi_writeback Currently, a bdi (backing_dev_info) embeds single wb (bdi_writeback) and the role of the separation is unclear. For cgroup support for writeback IOs, a bdi will be updated to host multiple wb's where each wb serves writeback IOs of a different cgroup on the bdi. To achieve that, a wb should carry all states necessary for servicing writeback IOs for a cgroup independently. This patch moves bdi->wb_lock and ->worklist into wb. * The lock protects bdi->worklist and bdi->wb.dwork scheduling. While moving, rename it to wb->work_lock as wb->wb_lock is confusing. Also, move wb->dwork downwards so that it's colocated with the new ->work_lock and ->work_list fields. * bdi_writeback_workfn() -> wb_workfn() bdi_wakeup_thread_delayed(bdi) -> wb_wakeup_delayed(wb) bdi_wakeup_thread(bdi) -> wb_wakeup(wb) bdi_queue_work(bdi, ...) -> wb_queue_work(wb, ...) __bdi_start_writeback(bdi, ...) -> __wb_start_writeback(wb, ...) get_next_work_item(bdi) -> get_next_work_item(wb) * bdi_wb_shutdown() is renamed to wb_shutdown() and now takes @wb. The function contained parts which belong to the containing bdi rather than the wb itself - testing cap_writeback_dirty and bdi_remove_from_list() invocation. Those are moved to bdi_unregister(). * bdi_wb_{init|exit}() are renamed to wb_{init|exit}(). Initializations of the moved bdi->wb_lock and ->work_list are relocated from bdi_init() to wb_init(). * As there's still only one bdi_writeback per backing_dev_info, all uses of bdi->state are mechanically replaced with bdi->wb.state introducing no behavior changes. Signed-off-by: Tejun Heo Reviewed-by: Jan Kara Cc: Jens Axboe Cc: Wu Fengguang Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 2ab06049d812..d796f49ce87a 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -52,7 +52,6 @@ struct bdi_writeback { unsigned long state; /* Always use atomic bitops on this */ unsigned long last_old_flush; /* last old data flush */ - struct delayed_work dwork; /* work item used for writeback */ struct list_head b_dirty; /* dirty inodes */ struct list_head b_io; /* parked for writeback */ struct list_head b_more_io; /* parked for more writeback */ @@ -78,6 +77,10 @@ struct bdi_writeback { struct fprop_local_percpu completions; int dirty_exceeded; + + spinlock_t work_lock; /* protects work_list & dwork scheduling */ + struct list_head work_list; + struct delayed_work dwork; /* work item used for writeback */ }; struct backing_dev_info { @@ -93,9 +96,6 @@ struct backing_dev_info { unsigned int max_ratio, max_prop_frac; struct bdi_writeback wb; /* default writeback info for this bdi */ - spinlock_t wb_lock; /* protects work_list & wb.dwork scheduling */ - - struct list_head work_list; struct device *dev; @@ -121,9 +121,9 @@ int __must_check bdi_setup_and_register(struct backing_dev_info *, char *); void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, enum wb_reason reason); void bdi_start_background_writeback(struct backing_dev_info *bdi); -void bdi_writeback_workfn(struct work_struct *work); +void wb_workfn(struct work_struct *work); int bdi_has_dirty_io(struct backing_dev_info *bdi); -void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi); +void wb_wakeup_delayed(struct bdi_writeback *wb); extern spinlock_t bdi_lock; extern struct list_head bdi_list; -- cgit v1.2.3 From 66114cad64bf76a155fec1f0fff0de771cf909d5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:32 -0400 Subject: writeback: separate out include/linux/backing-dev-defs.h With the planned cgroup writeback support, backing-dev related declarations will be more widely used across block and cgroup; unfortunately, including backing-dev.h from include/linux/blkdev.h makes cyclic include dependency quite likely. This patch separates out backing-dev-defs.h which only has the essential definitions and updates blkdev.h to include it. c files which need access to more backing-dev details now include backing-dev.h directly. This takes backing-dev.h off the common include dependency chain making it a lot easier to use it across block and cgroup. v2: fs/fat build failure fixed. Signed-off-by: Tejun Heo Reviewed-by: Jan Kara Cc: Jens Axboe Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 106 +++++++++++++++++++++++++++++++++++++++ include/linux/backing-dev.h | 102 +------------------------------------ include/linux/blkdev.h | 2 +- 3 files changed, 108 insertions(+), 102 deletions(-) create mode 100644 include/linux/backing-dev-defs.h (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h new file mode 100644 index 000000000000..aa18c4bd43c1 --- /dev/null +++ b/include/linux/backing-dev-defs.h @@ -0,0 +1,106 @@ +#ifndef __LINUX_BACKING_DEV_DEFS_H +#define __LINUX_BACKING_DEV_DEFS_H + +#include +#include +#include +#include +#include +#include + +struct page; +struct device; +struct dentry; + +/* + * Bits in bdi_writeback.state + */ +enum wb_state { + WB_async_congested, /* The async (write) queue is getting full */ + WB_sync_congested, /* The sync queue is getting full */ + WB_registered, /* bdi_register() was done */ + WB_writeback_running, /* Writeback is in progress */ +}; + +typedef int (congested_fn)(void *, int); + +enum wb_stat_item { + WB_RECLAIMABLE, + WB_WRITEBACK, + WB_DIRTIED, + WB_WRITTEN, + NR_WB_STAT_ITEMS +}; + +#define WB_STAT_BATCH (8*(1+ilog2(nr_cpu_ids))) + +struct bdi_writeback { + struct backing_dev_info *bdi; /* our parent bdi */ + + unsigned long state; /* Always use atomic bitops on this */ + unsigned long last_old_flush; /* last old data flush */ + + struct list_head b_dirty; /* dirty inodes */ + struct list_head b_io; /* parked for writeback */ + struct list_head b_more_io; /* parked for more writeback */ + struct list_head b_dirty_time; /* time stamps are dirty */ + spinlock_t list_lock; /* protects the b_* lists */ + + struct percpu_counter stat[NR_WB_STAT_ITEMS]; + + unsigned long bw_time_stamp; /* last time write bw is updated */ + unsigned long dirtied_stamp; + unsigned long written_stamp; /* pages written at bw_time_stamp */ + unsigned long write_bandwidth; /* the estimated write bandwidth */ + unsigned long avg_write_bandwidth; /* further smoothed write bw */ + + /* + * The base dirty throttle rate, re-calculated on every 200ms. + * All the bdi tasks' dirty rate will be curbed under it. + * @dirty_ratelimit tracks the estimated @balanced_dirty_ratelimit + * in small steps and is much more smooth/stable than the latter. + */ + unsigned long dirty_ratelimit; + unsigned long balanced_dirty_ratelimit; + + struct fprop_local_percpu completions; + int dirty_exceeded; + + spinlock_t work_lock; /* protects work_list & dwork scheduling */ + struct list_head work_list; + struct delayed_work dwork; /* work item used for writeback */ +}; + +struct backing_dev_info { + struct list_head bdi_list; + unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */ + unsigned int capabilities; /* Device capabilities */ + congested_fn *congested_fn; /* Function pointer if device is md/dm */ + void *congested_data; /* Pointer to aux data for congested func */ + + char *name; + + unsigned int min_ratio; + unsigned int max_ratio, max_prop_frac; + + struct bdi_writeback wb; /* default writeback info for this bdi */ + + struct device *dev; + + struct timer_list laptop_mode_wb_timer; + +#ifdef CONFIG_DEBUG_FS + struct dentry *debug_dir; + struct dentry *debug_stats; +#endif +}; + +enum { + BLK_RW_ASYNC = 0, + BLK_RW_SYNC = 1, +}; + +void clear_bdi_congested(struct backing_dev_info *bdi, int sync); +void set_bdi_congested(struct backing_dev_info *bdi, int sync); + +#endif /* __LINUX_BACKING_DEV_DEFS_H */ diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index d796f49ce87a..5e39f7a8efed 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -8,104 +8,11 @@ #ifndef _LINUX_BACKING_DEV_H #define _LINUX_BACKING_DEV_H -#include -#include -#include #include #include #include -#include #include -#include -#include -#include - -struct page; -struct device; -struct dentry; - -/* - * Bits in bdi_writeback.state - */ -enum wb_state { - WB_async_congested, /* The async (write) queue is getting full */ - WB_sync_congested, /* The sync queue is getting full */ - WB_registered, /* bdi_register() was done */ - WB_writeback_running, /* Writeback is in progress */ -}; - -typedef int (congested_fn)(void *, int); - -enum wb_stat_item { - WB_RECLAIMABLE, - WB_WRITEBACK, - WB_DIRTIED, - WB_WRITTEN, - NR_WB_STAT_ITEMS -}; - -#define WB_STAT_BATCH (8*(1+ilog2(nr_cpu_ids))) - -struct bdi_writeback { - struct backing_dev_info *bdi; /* our parent bdi */ - - unsigned long state; /* Always use atomic bitops on this */ - unsigned long last_old_flush; /* last old data flush */ - - struct list_head b_dirty; /* dirty inodes */ - struct list_head b_io; /* parked for writeback */ - struct list_head b_more_io; /* parked for more writeback */ - struct list_head b_dirty_time; /* time stamps are dirty */ - spinlock_t list_lock; /* protects the b_* lists */ - - struct percpu_counter stat[NR_WB_STAT_ITEMS]; - - unsigned long bw_time_stamp; /* last time write bw is updated */ - unsigned long dirtied_stamp; - unsigned long written_stamp; /* pages written at bw_time_stamp */ - unsigned long write_bandwidth; /* the estimated write bandwidth */ - unsigned long avg_write_bandwidth; /* further smoothed write bw */ - - /* - * The base dirty throttle rate, re-calculated on every 200ms. - * All the bdi tasks' dirty rate will be curbed under it. - * @dirty_ratelimit tracks the estimated @balanced_dirty_ratelimit - * in small steps and is much more smooth/stable than the latter. - */ - unsigned long dirty_ratelimit; - unsigned long balanced_dirty_ratelimit; - - struct fprop_local_percpu completions; - int dirty_exceeded; - - spinlock_t work_lock; /* protects work_list & dwork scheduling */ - struct list_head work_list; - struct delayed_work dwork; /* work item used for writeback */ -}; - -struct backing_dev_info { - struct list_head bdi_list; - unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */ - unsigned int capabilities; /* Device capabilities */ - congested_fn *congested_fn; /* Function pointer if device is md/dm */ - void *congested_data; /* Pointer to aux data for congested func */ - - char *name; - - unsigned int min_ratio; - unsigned int max_ratio, max_prop_frac; - - struct bdi_writeback wb; /* default writeback info for this bdi */ - - struct device *dev; - - struct timer_list laptop_mode_wb_timer; - -#ifdef CONFIG_DEBUG_FS - struct dentry *debug_dir; - struct dentry *debug_stats; -#endif -}; +#include struct backing_dev_info *inode_to_bdi(struct inode *inode); @@ -265,13 +172,6 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi) (1 << WB_async_congested)); } -enum { - BLK_RW_ASYNC = 0, - BLK_RW_SYNC = 1, -}; - -void clear_bdi_congested(struct backing_dev_info *bdi, int sync); -void set_bdi_congested(struct backing_dev_info *bdi, int sync); long congestion_wait(int sync, long timeout); long wait_iff_congested(struct zone *zone, int sync, long timeout); int pdflush_proc_obsolete(struct ctl_table *table, int write, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ccaa9aecd593..60d2726a6b62 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v1.2.3 From a212b105b07d75b48b1a166378282e8a77fbf53d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:33 -0400 Subject: bdi: make inode_to_bdi() inline Now that bdi definitions are moved to backing-dev-defs.h, backing-dev.h can include blkdev.h and inline inode_to_bdi() without worrying about introducing circular include dependency. The function gets called from hot paths and fairly trivial. This patch makes inode_to_bdi() and sb_is_blkdev_sb() that the function calls inline. blockdev_superblock and noop_backing_dev_info are EXPORT_GPL'd to allow the inline functions to be used from modules. While at it, make sb_is_blkdev_sb() return bool instead of int. v2: Fixed typo in description as suggested by Jan. Signed-off-by: Tejun Heo Reviewed-by: Jens Axboe Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 18 ++++++++++++++++-- include/linux/fs.h | 8 +++++++- 2 files changed, 23 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 5e39f7a8efed..785782034e86 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -11,11 +11,10 @@ #include #include #include +#include #include #include -struct backing_dev_info *inode_to_bdi(struct inode *inode); - int __must_check bdi_init(struct backing_dev_info *bdi); void bdi_destroy(struct backing_dev_info *bdi); @@ -149,6 +148,21 @@ extern struct backing_dev_info noop_backing_dev_info; int writeback_in_progress(struct backing_dev_info *bdi); +static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) +{ + struct super_block *sb; + + if (!inode) + return &noop_backing_dev_info; + + sb = inode->i_sb; +#ifdef CONFIG_BLOCK + if (sb_is_blkdev_sb(sb)) + return blk_get_backing_dev_info(I_BDEV(inode)); +#endif + return sb->s_bdi; +} + static inline int bdi_congested(struct backing_dev_info *bdi, int bdi_bits) { if (bdi->congested_fn) diff --git a/include/linux/fs.h b/include/linux/fs.h index 1ef63900243c..ce100b87fba3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2240,7 +2240,13 @@ extern struct super_block *freeze_bdev(struct block_device *); extern void emergency_thaw_all(void); extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); extern int fsync_bdev(struct block_device *); -extern int sb_is_blkdev_sb(struct super_block *sb); + +extern struct super_block *blockdev_superblock; + +static inline bool sb_is_blkdev_sb(struct super_block *sb) +{ + return sb == blockdev_superblock; +} #else static inline void bd_forget(struct inode *inode) {} static inline int sync_blockdev(struct block_device *bdev) { return 0; } -- cgit v1.2.3 From 4aa9c692e052cf6db99db62a8fe0543e5c455da7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:35 -0400 Subject: bdi: separate out congested state into a separate struct Currently, a wb's (bdi_writeback) congestion state is carried in its ->state field; however, cgroup writeback support will require multiple wb's sharing the same congestion state. This patch separates out congestion state into its own struct - struct bdi_writeback_congested. A new field wb field, wb_congested, points to its associated congested struct. The default wb, bdi->wb, always points to bdi->wb_congested. While this patch adds a layer of indirection, it doesn't introduce any behavior changes. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 14 ++++++++++++-- include/linux/backing-dev.h | 2 +- 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index aa18c4bd43c1..9e9eafa5f5aa 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -16,12 +16,15 @@ struct dentry; * Bits in bdi_writeback.state */ enum wb_state { - WB_async_congested, /* The async (write) queue is getting full */ - WB_sync_congested, /* The sync queue is getting full */ WB_registered, /* bdi_register() was done */ WB_writeback_running, /* Writeback is in progress */ }; +enum wb_congested_state { + WB_async_congested, /* The async (write) queue is getting full */ + WB_sync_congested, /* The sync queue is getting full */ +}; + typedef int (congested_fn)(void *, int); enum wb_stat_item { @@ -34,6 +37,10 @@ enum wb_stat_item { #define WB_STAT_BATCH (8*(1+ilog2(nr_cpu_ids))) +struct bdi_writeback_congested { + unsigned long state; /* WB_[a]sync_congested flags */ +}; + struct bdi_writeback { struct backing_dev_info *bdi; /* our parent bdi */ @@ -48,6 +55,8 @@ struct bdi_writeback { struct percpu_counter stat[NR_WB_STAT_ITEMS]; + struct bdi_writeback_congested *congested; + unsigned long bw_time_stamp; /* last time write bw is updated */ unsigned long dirtied_stamp; unsigned long written_stamp; /* pages written at bw_time_stamp */ @@ -84,6 +93,7 @@ struct backing_dev_info { unsigned int max_ratio, max_prop_frac; struct bdi_writeback wb; /* default writeback info for this bdi */ + struct bdi_writeback_congested wb_congested; struct device *dev; diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 785782034e86..bfdaa18ba0a1 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -167,7 +167,7 @@ static inline int bdi_congested(struct backing_dev_info *bdi, int bdi_bits) { if (bdi->congested_fn) return bdi->congested_fn(bdi->congested_data, bdi_bits); - return (bdi->wb.state & bdi_bits); + return (bdi->wb.congested->state & bdi_bits); } static inline int bdi_read_congested(struct backing_dev_info *bdi) -- cgit v1.2.3 From 89e9b9e07a390c50980d10aa37a04631db5a23ab Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:36 -0400 Subject: writeback: add {CONFIG|BDI_CAP|FS}_CGROUP_WRITEBACK cgroup writeback requires support from both bdi and filesystem sides. Add BDI_CAP_CGROUP_WRITEBACK and FS_CGROUP_WRITEBACK to indicate support and enable BDI_CAP_CGROUP_WRITEBACK on block based bdi's by default. Also, define CONFIG_CGROUP_WRITEBACK which is enabled if both MEMCG and BLK_CGROUP are enabled. inode_cgwb_enabled() which determines whether a given inode's both bdi and fs support cgroup writeback is added. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 32 +++++++++++++++++++++++++++++++- include/linux/fs.h | 1 + 2 files changed, 32 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index bfdaa18ba0a1..6bb31234e6a9 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -134,12 +134,15 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); * BDI_CAP_NO_WRITEBACK: Don't write pages back * BDI_CAP_NO_ACCT_WB: Don't automatically account writeback pages * BDI_CAP_STRICTLIMIT: Keep number of dirty pages below bdi threshold. + * + * BDI_CAP_CGROUP_WRITEBACK: Supports cgroup-aware writeback. */ #define BDI_CAP_NO_ACCT_DIRTY 0x00000001 #define BDI_CAP_NO_WRITEBACK 0x00000002 #define BDI_CAP_NO_ACCT_WB 0x00000004 #define BDI_CAP_STABLE_WRITES 0x00000008 #define BDI_CAP_STRICTLIMIT 0x00000010 +#define BDI_CAP_CGROUP_WRITEBACK 0x00000020 #define BDI_CAP_NO_ACCT_AND_WRITEBACK \ (BDI_CAP_NO_WRITEBACK | BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_ACCT_WB) @@ -229,4 +232,31 @@ static inline int bdi_sched_wait(void *word) return 0; } -#endif /* _LINUX_BACKING_DEV_H */ +#ifdef CONFIG_CGROUP_WRITEBACK + +/** + * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode + * @inode: inode of interest + * + * cgroup writeback requires support from both the bdi and filesystem. + * Test whether @inode has both. + */ +static inline bool inode_cgwb_enabled(struct inode *inode) +{ + struct backing_dev_info *bdi = inode_to_bdi(inode); + + return bdi_cap_account_dirty(bdi) && + (bdi->capabilities & BDI_CAP_CGROUP_WRITEBACK) && + (inode->i_sb->s_type->fs_flags & FS_CGROUP_WRITEBACK); +} + +#else /* CONFIG_CGROUP_WRITEBACK */ + +static inline bool inode_cgwb_enabled(struct inode *inode) +{ + return false; +} + +#endif /* CONFIG_CGROUP_WRITEBACK */ + +#endif /* _LINUX_BACKING_DEV_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index ce100b87fba3..74e0ae0626a8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1897,6 +1897,7 @@ struct file_system_type { #define FS_HAS_SUBTYPE 4 #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */ +#define FS_CGROUP_WRITEBACK 32 /* Supports cgroup-aware writeback */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ struct dentry *(*mount) (struct file_system_type *, int, const char *, void *); -- cgit v1.2.3 From 52ebea749aaed195245701a8f90a23d672c7a933 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:37 -0400 Subject: writeback: make backing_dev_info host cgroup-specific bdi_writebacks For the planned cgroup writeback support, on each bdi (backing_dev_info), each memcg will be served by a separate wb (bdi_writeback). This patch updates bdi so that a bdi can host multiple wbs (bdi_writebacks). On the default hierarchy, blkcg implicitly enables memcg. This allows using memcg's page ownership for attributing writeback IOs, and every memcg - blkcg combination can be served by its own wb by assigning a dedicated wb to each memcg. This means that there may be multiple wb's of a bdi mapped to the same blkcg. As congested state is per blkcg - bdi combination, those wb's should share the same congested state. This is achieved by tracking congested state via bdi_writeback_congested structs which are keyed by blkcg. bdi->wb remains unchanged and will keep serving the root cgroup. cgwb's (cgroup wb's) for non-root cgroups are created on-demand or looked up while dirtying an inode according to the memcg of the page being dirtied or current task. Each cgwb is indexed on bdi->cgwb_tree by its memcg id. Once an inode is associated with its wb, it can be retrieved using inode_to_wb(). Currently, none of the filesystems has FS_CGROUP_WRITEBACK and all pages will keep being associated with bdi->wb. v3: inode_attach_wb() in account_page_dirtied() moved inside mapping_cap_account_dirty() block where it's known to be !NULL. Also, an unnecessary NULL check before kfree() removed. Both detected by the kbuild bot. v2: Updated so that wb association is per inode and wb is per memcg rather than blkcg. Signed-off-by: Tejun Heo Cc: kbuild test robot Cc: Dan Carpenter Cc: Jens Axboe Cc: Jan Kara Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 59 +++++++++++- include/linux/backing-dev.h | 195 +++++++++++++++++++++++++++++++++++++++ include/linux/blk-cgroup.h | 4 + include/linux/fs.h | 4 + include/linux/memcontrol.h | 4 + 5 files changed, 263 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 9e9eafa5f5aa..a1e9c407a59a 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -2,8 +2,11 @@ #define __LINUX_BACKING_DEV_DEFS_H #include +#include +#include #include #include +#include #include #include #include @@ -37,10 +40,43 @@ enum wb_stat_item { #define WB_STAT_BATCH (8*(1+ilog2(nr_cpu_ids))) +/* + * For cgroup writeback, multiple wb's may map to the same blkcg. Those + * wb's can operate mostly independently but should share the congested + * state. To facilitate such sharing, the congested state is tracked using + * the following struct which is created on demand, indexed by blkcg ID on + * its bdi, and refcounted. + */ struct bdi_writeback_congested { unsigned long state; /* WB_[a]sync_congested flags */ + +#ifdef CONFIG_CGROUP_WRITEBACK + struct backing_dev_info *bdi; /* the associated bdi */ + atomic_t refcnt; /* nr of attached wb's and blkg */ + int blkcg_id; /* ID of the associated blkcg */ + struct rb_node rb_node; /* on bdi->cgwb_congestion_tree */ +#endif }; +/* + * Each wb (bdi_writeback) can perform writeback operations, is measured + * and throttled, independently. Without cgroup writeback, each bdi + * (bdi_writeback) is served by its embedded bdi->wb. + * + * On the default hierarchy, blkcg implicitly enables memcg. This allows + * using memcg's page ownership for attributing writeback IOs, and every + * memcg - blkcg combination can be served by its own wb by assigning a + * dedicated wb to each memcg, which enables isolation across different + * cgroups and propagation of IO back pressure down from the IO layer upto + * the tasks which are generating the dirty pages to be written back. + * + * A cgroup wb is indexed on its bdi by the ID of the associated memcg, + * refcounted with the number of inodes attached to it, and pins the memcg + * and the corresponding blkcg. As the corresponding blkcg for a memcg may + * change as blkcg is disabled and enabled higher up in the hierarchy, a wb + * is tested for blkcg after lookup and removed from index on mismatch so + * that a new wb for the combination can be created. + */ struct bdi_writeback { struct backing_dev_info *bdi; /* our parent bdi */ @@ -78,6 +114,19 @@ struct bdi_writeback { spinlock_t work_lock; /* protects work_list & dwork scheduling */ struct list_head work_list; struct delayed_work dwork; /* work item used for writeback */ + +#ifdef CONFIG_CGROUP_WRITEBACK + struct percpu_ref refcnt; /* used only for !root wb's */ + struct cgroup_subsys_state *memcg_css; /* the associated memcg */ + struct cgroup_subsys_state *blkcg_css; /* and blkcg */ + struct list_head memcg_node; /* anchored at memcg->cgwb_list */ + struct list_head blkcg_node; /* anchored at blkcg->cgwb_list */ + + union { + struct work_struct release_work; + struct rcu_head rcu; + }; +#endif }; struct backing_dev_info { @@ -92,9 +141,13 @@ struct backing_dev_info { unsigned int min_ratio; unsigned int max_ratio, max_prop_frac; - struct bdi_writeback wb; /* default writeback info for this bdi */ - struct bdi_writeback_congested wb_congested; - + struct bdi_writeback wb; /* the root writeback info for this bdi */ + struct bdi_writeback_congested wb_congested; /* its congested state */ +#ifdef CONFIG_CGROUP_WRITEBACK + struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */ + struct rb_root cgwb_congested_tree; /* their congested states */ + atomic_t usage_cnt; /* counts both cgwbs and cgwb_contested's */ +#endif struct device *dev; struct timer_list laptop_mode_wb_timer; diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 6bb31234e6a9..8ae59df2e3d1 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -13,6 +13,7 @@ #include #include #include +#include #include int __must_check bdi_init(struct backing_dev_info *bdi); @@ -234,6 +235,16 @@ static inline int bdi_sched_wait(void *word) #ifdef CONFIG_CGROUP_WRITEBACK +struct bdi_writeback_congested * +wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp); +void wb_congested_put(struct bdi_writeback_congested *congested); +struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi, + struct cgroup_subsys_state *memcg_css, + gfp_t gfp); +void __inode_attach_wb(struct inode *inode, struct page *page); +void wb_memcg_offline(struct mem_cgroup *memcg); +void wb_blkcg_offline(struct blkcg *blkcg); + /** * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode * @inode: inode of interest @@ -250,6 +261,135 @@ static inline bool inode_cgwb_enabled(struct inode *inode) (inode->i_sb->s_type->fs_flags & FS_CGROUP_WRITEBACK); } +/** + * wb_tryget - try to increment a wb's refcount + * @wb: bdi_writeback to get + */ +static inline bool wb_tryget(struct bdi_writeback *wb) +{ + if (wb != &wb->bdi->wb) + return percpu_ref_tryget(&wb->refcnt); + return true; +} + +/** + * wb_get - increment a wb's refcount + * @wb: bdi_writeback to get + */ +static inline void wb_get(struct bdi_writeback *wb) +{ + if (wb != &wb->bdi->wb) + percpu_ref_get(&wb->refcnt); +} + +/** + * wb_put - decrement a wb's refcount + * @wb: bdi_writeback to put + */ +static inline void wb_put(struct bdi_writeback *wb) +{ + if (wb != &wb->bdi->wb) + percpu_ref_put(&wb->refcnt); +} + +/** + * wb_find_current - find wb for %current on a bdi + * @bdi: bdi of interest + * + * Find the wb of @bdi which matches both the memcg and blkcg of %current. + * Must be called under rcu_read_lock() which protects the returend wb. + * NULL if not found. + */ +static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi) +{ + struct cgroup_subsys_state *memcg_css; + struct bdi_writeback *wb; + + memcg_css = task_css(current, memory_cgrp_id); + if (!memcg_css->parent) + return &bdi->wb; + + wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id); + + /* + * %current's blkcg equals the effective blkcg of its memcg. No + * need to use the relatively expensive cgroup_get_e_css(). + */ + if (likely(wb && wb->blkcg_css == task_css(current, blkio_cgrp_id))) + return wb; + return NULL; +} + +/** + * wb_get_create_current - get or create wb for %current on a bdi + * @bdi: bdi of interest + * @gfp: allocation mask + * + * Equivalent to wb_get_create() on %current's memcg. This function is + * called from a relatively hot path and optimizes the common cases using + * wb_find_current(). + */ +static inline struct bdi_writeback * +wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp) +{ + struct bdi_writeback *wb; + + rcu_read_lock(); + wb = wb_find_current(bdi); + if (wb && unlikely(!wb_tryget(wb))) + wb = NULL; + rcu_read_unlock(); + + if (unlikely(!wb)) { + struct cgroup_subsys_state *memcg_css; + + memcg_css = task_get_css(current, memory_cgrp_id); + wb = wb_get_create(bdi, memcg_css, gfp); + css_put(memcg_css); + } + return wb; +} + +/** + * inode_attach_wb - associate an inode with its wb + * @inode: inode of interest + * @page: page being dirtied (may be NULL) + * + * If @inode doesn't have its wb, associate it with the wb matching the + * memcg of @page or, if @page is NULL, %current. May be called w/ or w/o + * @inode->i_lock. + */ +static inline void inode_attach_wb(struct inode *inode, struct page *page) +{ + if (!inode->i_wb) + __inode_attach_wb(inode, page); +} + +/** + * inode_detach_wb - disassociate an inode from its wb + * @inode: inode of interest + * + * @inode is being freed. Detach from its wb. + */ +static inline void inode_detach_wb(struct inode *inode) +{ + if (inode->i_wb) { + wb_put(inode->i_wb); + inode->i_wb = NULL; + } +} + +/** + * inode_to_wb - determine the wb of an inode + * @inode: inode of interest + * + * Returns the wb @inode is currently associated with. + */ +static inline struct bdi_writeback *inode_to_wb(struct inode *inode) +{ + return inode->i_wb; +} + #else /* CONFIG_CGROUP_WRITEBACK */ static inline bool inode_cgwb_enabled(struct inode *inode) @@ -257,6 +397,61 @@ static inline bool inode_cgwb_enabled(struct inode *inode) return false; } +static inline struct bdi_writeback_congested * +wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp) +{ + return bdi->wb.congested; +} + +static inline void wb_congested_put(struct bdi_writeback_congested *congested) +{ +} + +static inline bool wb_tryget(struct bdi_writeback *wb) +{ + return true; +} + +static inline void wb_get(struct bdi_writeback *wb) +{ +} + +static inline void wb_put(struct bdi_writeback *wb) +{ +} + +static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi) +{ + return &bdi->wb; +} + +static inline struct bdi_writeback * +wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp) +{ + return &bdi->wb; +} + +static inline void inode_attach_wb(struct inode *inode, struct page *page) +{ +} + +static inline void inode_detach_wb(struct inode *inode) +{ +} + +static inline struct bdi_writeback *inode_to_wb(struct inode *inode) +{ + return &inode_to_bdi(inode)->wb; +} + +static inline void wb_memcg_offline(struct mem_cgroup *memcg) +{ +} + +static inline void wb_blkcg_offline(struct blkcg *blkcg) +{ +} + #endif /* CONFIG_CGROUP_WRITEBACK */ #endif /* _LINUX_BACKING_DEV_H */ diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 4dc643f2046e..3033eb173eb4 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -53,6 +53,10 @@ struct blkcg { /* TODO: per-policy storage in blkcg */ unsigned int cfq_weight; /* belongs to cfq */ unsigned int cfq_leaf_weight; + +#ifdef CONFIG_CGROUP_WRITEBACK + struct list_head cgwb_list; +#endif }; struct blkg_stat { diff --git a/include/linux/fs.h b/include/linux/fs.h index 74e0ae0626a8..67a42ec95065 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -35,6 +35,7 @@ #include struct backing_dev_info; +struct bdi_writeback; struct export_operations; struct hd_geometry; struct iovec; @@ -635,6 +636,9 @@ struct inode { struct hlist_node i_hash; struct list_head i_wb_list; /* backing dev IO list */ +#ifdef CONFIG_CGROUP_WRITEBACK + struct bdi_writeback *i_wb; /* the associated cgroup wb */ +#endif struct list_head i_lru; /* inode LRU list */ struct list_head i_sb_list; union { diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 637ef626008e..662a953ea8ad 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -388,6 +388,10 @@ enum { OVER_LIMIT, }; +#ifdef CONFIG_CGROUP_WRITEBACK +struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg); +#endif + struct sock; #if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM) void sock_update_memcg(struct sock *sk); -- cgit v1.2.3 From ce7acfeaf0363c8b75810908448f61af04d38f91 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:38 -0400 Subject: writeback, blkcg: associate each blkcg_gq with the corresponding bdi_writeback_congested A blkg (blkcg_gq) can be congested and decongested independently from other blkgs on the same request_queue. Accordingly, for cgroup writeback support, the congestion status at bdi (backing_dev_info) should be split and updated separately from matching blkg's. This patch prepares by adding blkg->wb_congested and associating a blkg with its matching per-blkcg bdi_writeback_congested on creation. v2: Updated to associate bdi_writeback_congested instead of bdi_writeback. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Cc: Vivek Goyal Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 3033eb173eb4..07a32b813ed8 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -99,6 +99,12 @@ struct blkcg_gq { struct hlist_node blkcg_node; struct blkcg *blkcg; + /* + * Each blkg gets congested separately and the congestion state is + * propagated to the matching bdi_writeback_congested. + */ + struct bdi_writeback_congested *wb_congested; + /* all non-root blkcg_gq's are guaranteed to have access to parent */ struct blkcg_gq *parent; -- cgit v1.2.3 From ec8a6f2643923ee5b74d24fa8d134240379f436b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:41 -0400 Subject: writeback: make congestion functions per bdi_writeback Currently, all congestion functions take bdi (backing_dev_info) and always operate on the root wb (bdi->wb) and the congestion state from the block layer is propagated only for the root blkcg. This patch introduces {set|clear}_wb_congested() and wb_congested() which take a bdi_writeback_congested and bdi_writeback respectively. The bdi counteparts are now wrappers invoking the wb based functions on @bdi->wb. While converting clear_bdi_congested() to clear_wb_congested(), the local variable declaration order between @wqh and @bit is swapped for cosmetic reason. This patch just adds the new wb based functions. The following patches will apply them. v2: Updated for bdi_writeback_congested. Signed-off-by: Tejun Heo Reviewed-by: Jan Kara Cc: Jens Axboe Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 14 +++++++++++-- include/linux/backing-dev.h | 45 +++++++++++++++++++++++----------------- 2 files changed, 38 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index a1e9c407a59a..eb386766b5f3 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -163,7 +163,17 @@ enum { BLK_RW_SYNC = 1, }; -void clear_bdi_congested(struct backing_dev_info *bdi, int sync); -void set_bdi_congested(struct backing_dev_info *bdi, int sync); +void clear_wb_congested(struct bdi_writeback_congested *congested, int sync); +void set_wb_congested(struct bdi_writeback_congested *congested, int sync); + +static inline void clear_bdi_congested(struct backing_dev_info *bdi, int sync) +{ + clear_wb_congested(bdi->wb.congested, sync); +} + +static inline void set_bdi_congested(struct backing_dev_info *bdi, int sync) +{ + set_wb_congested(bdi->wb.congested, sync); +} #endif /* __LINUX_BACKING_DEV_DEFS_H */ diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 8ae59df2e3d1..2c498a2a8268 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -167,27 +167,13 @@ static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) return sb->s_bdi; } -static inline int bdi_congested(struct backing_dev_info *bdi, int bdi_bits) +static inline int wb_congested(struct bdi_writeback *wb, int cong_bits) { - if (bdi->congested_fn) - return bdi->congested_fn(bdi->congested_data, bdi_bits); - return (bdi->wb.congested->state & bdi_bits); -} - -static inline int bdi_read_congested(struct backing_dev_info *bdi) -{ - return bdi_congested(bdi, 1 << WB_sync_congested); -} - -static inline int bdi_write_congested(struct backing_dev_info *bdi) -{ - return bdi_congested(bdi, 1 << WB_async_congested); -} + struct backing_dev_info *bdi = wb->bdi; -static inline int bdi_rw_congested(struct backing_dev_info *bdi) -{ - return bdi_congested(bdi, (1 << WB_sync_congested) | - (1 << WB_async_congested)); + if (bdi->congested_fn) + return bdi->congested_fn(bdi->congested_data, cong_bits); + return wb->congested->state & cong_bits; } long congestion_wait(int sync, long timeout); @@ -454,4 +440,25 @@ static inline void wb_blkcg_offline(struct blkcg *blkcg) #endif /* CONFIG_CGROUP_WRITEBACK */ +static inline int bdi_congested(struct backing_dev_info *bdi, int cong_bits) +{ + return wb_congested(&bdi->wb, cong_bits); +} + +static inline int bdi_read_congested(struct backing_dev_info *bdi) +{ + return bdi_congested(bdi, 1 << WB_sync_congested); +} + +static inline int bdi_write_congested(struct backing_dev_info *bdi) +{ + return bdi_congested(bdi, 1 << WB_async_congested); +} + +static inline int bdi_rw_congested(struct backing_dev_info *bdi) +{ + return bdi_congested(bdi, (1 << WB_sync_congested) | + (1 << WB_async_congested)); +} + #endif /* _LINUX_BACKING_DEV_H */ -- cgit v1.2.3 From d40f75a06dd675808eed385d490ba9468200b23f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:42 -0400 Subject: writeback, blkcg: restructure blk_{set|clear}_queue_congested() blk_{set|clear}_queue_congested() take @q and set or clear, respectively, the congestion state of its bdi's root wb. Because bdi used to be able to handle congestion state only on the root wb, the callers of those functions tested whether the congestion is on the root blkcg and skipped if not. This is cumbersome and makes implementation of per cgroup bdi_writeback congestion state propagation difficult. This patch renames blk_{set|clear}_queue_congested() to blk_{set|clear}_congested(), and makes them take request_list instead of request_queue and test whether the specified request_list is the root one before updating bdi_writeback congestion state. This makes the tests in the callers unnecessary and simplifies them. As there are no external users of these functions, the definitions are moved from include/linux/blkdev.h to block/blk-core.c. This patch doesn't introduce any noticeable behavior difference. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Cc: Vivek Goyal Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 60d2726a6b62..ab4a27852f1b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -790,25 +790,6 @@ extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern void blk_queue_bio(struct request_queue *q, struct bio *bio); -/* - * A queue has just exitted congestion. Note this in the global counter of - * congested queues, and wake up anyone who was waiting for requests to be - * put back. - */ -static inline void blk_clear_queue_congested(struct request_queue *q, int sync) -{ - clear_bdi_congested(&q->backing_dev_info, sync); -} - -/* - * A queue has just entered congestion. Flag that in the queue's VM-visible - * state flags and increment the global gounter of congested queues. - */ -static inline void blk_set_queue_congested(struct request_queue *q, int sync) -{ - set_bdi_congested(&q->backing_dev_info, sync); -} - extern void blk_start_queue(struct request_queue *q); extern void blk_stop_queue(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); -- cgit v1.2.3 From 703c270887bb5106c4c46a00cc7477d30d5e04f5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:44 -0400 Subject: writeback: implement and use inode_congested() In several places, bdi_congested() and its wrappers are used to determine whether more IOs should be issued. With cgroup writeback support, this question can't be answered solely based on the bdi (backing_dev_info). It's dependent on whether the filesystem and bdi support cgroup writeback and the blkcg the inode is associated with. This patch implements inode_congested() and its wrappers which take @inode and determines the congestion state considering cgroup writeback. The new functions replace bdi_*congested() calls in places where the query is about specific inode and task. There are several filesystem users which also fit this criteria but they should be updated when each filesystem implements cgroup writeback support. v2: Now that a given inode is associated with only one wb, congestion state can be determined independent from the asking task. Drop @task. Spotted by Vivek. Also, converted to take @inode instead of @mapping and renamed to inode_congested(). Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Cc: Vivek Goyal Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 2c498a2a8268..6f0882105f95 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -230,6 +230,7 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi, void __inode_attach_wb(struct inode *inode, struct page *page); void wb_memcg_offline(struct mem_cgroup *memcg); void wb_blkcg_offline(struct blkcg *blkcg); +int inode_congested(struct inode *inode, int cong_bits); /** * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode @@ -438,8 +439,29 @@ static inline void wb_blkcg_offline(struct blkcg *blkcg) { } +static inline int inode_congested(struct inode *inode, int cong_bits) +{ + return wb_congested(&inode_to_bdi(inode)->wb, cong_bits); +} + #endif /* CONFIG_CGROUP_WRITEBACK */ +static inline int inode_read_congested(struct inode *inode) +{ + return inode_congested(inode, 1 << WB_sync_congested); +} + +static inline int inode_write_congested(struct inode *inode) +{ + return inode_congested(inode, 1 << WB_async_congested); +} + +static inline int inode_rw_congested(struct inode *inode) +{ + return inode_congested(inode, (1 << WB_sync_congested) | + (1 << WB_async_congested)); +} + static inline int bdi_congested(struct backing_dev_info *bdi, int cong_bits) { return wb_congested(&bdi->wb, cong_bits); -- cgit v1.2.3 From d6c10f1fc8626dc55946f4768ae322b4c57b07dd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:45 -0400 Subject: writeback: implement WB_has_dirty_io wb_state flag Currently, wb_has_dirty_io() determines whether a wb (bdi_writeback) has any dirty inode by testing all three IO lists on each invocation without actively keeping track. For cgroup writeback support, a single bdi will host multiple wb's each of which will host dirty inodes separately and we'll need to make bdi_has_dirty_io(), which currently only represents the root wb, aggregate has_dirty_io from all member wb's, which requires tracking transitions in has_dirty_io state on each wb. This patch introduces inode_wb_list_{move|del}_locked() to consolidate IO list operations leaving queue_io() the only other function which directly manipulates IO lists (via move_expired_inodes()). All three functions are updated to call wb_io_lists_[de]populated() which keep track of whether the wb has dirty inodes or not and record it using the new WB_has_dirty_io flag. inode_wb_list_moved_locked()'s return value indicates whether the wb had no dirty inodes before. mark_inode_dirty() is restructured so that the return value of inode_wb_list_move_locked() can be used for deciding whether to wake up the wb. While at it, change {bdi|wb}_has_dirty_io()'s return values to bool. These functions were returning 0 and 1 before. Also, add a comment explaining the synchronization of wb_state flags. v2: Updated to accommodate b_dirty_time. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 1 + include/linux/backing-dev.h | 8 +++----- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index eb386766b5f3..7a94b7850b7c 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -21,6 +21,7 @@ struct dentry; enum wb_state { WB_registered, /* bdi_register() was done */ WB_writeback_running, /* Writeback is in progress */ + WB_has_dirty_io, /* Dirty inodes on ->b_{dirty|io|more_io} */ }; enum wb_congested_state { diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 6f0882105f95..3c8403c012ce 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -29,7 +29,7 @@ void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, enum wb_reason reason); void bdi_start_background_writeback(struct backing_dev_info *bdi); void wb_workfn(struct work_struct *work); -int bdi_has_dirty_io(struct backing_dev_info *bdi); +bool bdi_has_dirty_io(struct backing_dev_info *bdi); void wb_wakeup_delayed(struct bdi_writeback *wb); extern spinlock_t bdi_lock; @@ -37,11 +37,9 @@ extern struct list_head bdi_list; extern struct workqueue_struct *bdi_wq; -static inline int wb_has_dirty_io(struct bdi_writeback *wb) +static inline bool wb_has_dirty_io(struct bdi_writeback *wb) { - return !list_empty(&wb->b_dirty) || - !list_empty(&wb->b_io) || - !list_empty(&wb->b_more_io); + return test_bit(WB_has_dirty_io, &wb->state); } static inline void __add_wb_stat(struct bdi_writeback *wb, -- cgit v1.2.3 From 766a9d6e60578f1ef6de71f89f022084f8bffc82 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:46 -0400 Subject: writeback: implement backing_dev_info->tot_write_bandwidth cgroup writeback support needs to keep track of the sum of avg_write_bandwidth of all wb's (bdi_writeback's) with dirty inodes to distribute write workload. This patch adds bdi->tot_write_bandwidth and updates inode_wb_list_move_locked(), inode_wb_list_del_locked() and wb_update_write_bandwidth() to adjust it as wb's gain and lose dirty inodes and its avg_write_bandwidth gets updated. As the update events are not synchronized with each other, bdi->tot_write_bandwidth is an atomic_long_t. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 7a94b7850b7c..d631a61f4023 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -142,6 +142,8 @@ struct backing_dev_info { unsigned int min_ratio; unsigned int max_ratio, max_prop_frac; + atomic_long_t tot_write_bandwidth; /* sum of active avg_write_bw */ + struct bdi_writeback wb; /* the root writeback info for this bdi */ struct bdi_writeback_congested wb_congested; /* its congested state */ #ifdef CONFIG_CGROUP_WRITEBACK -- cgit v1.2.3 From 95a46c65e3c09edb9f17dabf2dc16670cd328739 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:47 -0400 Subject: writeback: make bdi_has_dirty_io() take multiple bdi_writeback's into account bdi_has_dirty_io() used to only reflect whether the root wb (bdi_writeback) has dirty inodes. For cgroup writeback support, it needs to take all active wb's into account. If any wb on the bdi has dirty inodes, bdi_has_dirty_io() should return true. To achieve that, as inode_wb_list_{move|del}_locked() now keep track of the dirty state transition of each wb, the number of dirty wbs can be counted in the bdi; however, bdi is already aggregating wb->avg_write_bandwidth which can easily be guaranteed to be > 0 when there are any dirty inodes by ensuring wb->avg_write_bandwidth can't dip below 1. bdi_has_dirty_io() can simply test whether bdi->tot_write_bandwidth is zero or not. While this bumps the value of wb->avg_write_bandwidth to one when it used to be zero, this shouldn't cause any meaningful behavior difference. bdi_has_dirty_io() is made an inline function which tests whether ->tot_write_bandwidth is non-zero. Also, WARN_ON_ONCE()'s on its value are added to inode_wb_list_{move|del}_locked(). Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 8 ++++++-- include/linux/backing-dev.h | 10 +++++++++- 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index d631a61f4023..8c857d723023 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -98,7 +98,7 @@ struct bdi_writeback { unsigned long dirtied_stamp; unsigned long written_stamp; /* pages written at bw_time_stamp */ unsigned long write_bandwidth; /* the estimated write bandwidth */ - unsigned long avg_write_bandwidth; /* further smoothed write bw */ + unsigned long avg_write_bandwidth; /* further smoothed write bw, > 0 */ /* * The base dirty throttle rate, re-calculated on every 200ms. @@ -142,7 +142,11 @@ struct backing_dev_info { unsigned int min_ratio; unsigned int max_ratio, max_prop_frac; - atomic_long_t tot_write_bandwidth; /* sum of active avg_write_bw */ + /* + * Sum of avg_write_bw of wbs with dirty inodes. > 0 if there are + * any dirty wbs, which is depended upon by bdi_has_dirty(). + */ + atomic_long_t tot_write_bandwidth; struct bdi_writeback wb; /* the root writeback info for this bdi */ struct bdi_writeback_congested wb_congested; /* its congested state */ diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 3c8403c012ce..0839e44105bd 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -29,7 +29,6 @@ void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, enum wb_reason reason); void bdi_start_background_writeback(struct backing_dev_info *bdi); void wb_workfn(struct work_struct *work); -bool bdi_has_dirty_io(struct backing_dev_info *bdi); void wb_wakeup_delayed(struct bdi_writeback *wb); extern spinlock_t bdi_lock; @@ -42,6 +41,15 @@ static inline bool wb_has_dirty_io(struct bdi_writeback *wb) return test_bit(WB_has_dirty_io, &wb->state); } +static inline bool bdi_has_dirty_io(struct backing_dev_info *bdi) +{ + /* + * @bdi->tot_write_bandwidth is guaranteed to be > 0 if there are + * any dirty wbs. See wb_update_write_bandwidth(). + */ + return atomic_long_read(&bdi->tot_write_bandwidth); +} + static inline void __add_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item, s64 amount) { -- cgit v1.2.3 From ebe41ab0c79d5633123f6faa3265a1a63c5f22d8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:50 -0400 Subject: writeback: implement bdi_for_each_wb() This will be used to implement bdi-wide operations which should be distributed across all its cgroup bdi_writebacks. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 63 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 0839e44105bd..c7979806baee 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -383,6 +383,61 @@ static inline struct bdi_writeback *inode_to_wb(struct inode *inode) return inode->i_wb; } +struct wb_iter { + int start_blkcg_id; + struct radix_tree_iter tree_iter; + void **slot; +}; + +static inline struct bdi_writeback *__wb_iter_next(struct wb_iter *iter, + struct backing_dev_info *bdi) +{ + struct radix_tree_iter *titer = &iter->tree_iter; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + if (iter->start_blkcg_id >= 0) { + iter->slot = radix_tree_iter_init(titer, iter->start_blkcg_id); + iter->start_blkcg_id = -1; + } else { + iter->slot = radix_tree_next_slot(iter->slot, titer, 0); + } + + if (!iter->slot) + iter->slot = radix_tree_next_chunk(&bdi->cgwb_tree, titer, 0); + if (iter->slot) + return *iter->slot; + return NULL; +} + +static inline struct bdi_writeback *__wb_iter_init(struct wb_iter *iter, + struct backing_dev_info *bdi, + int start_blkcg_id) +{ + iter->start_blkcg_id = start_blkcg_id; + + if (start_blkcg_id) + return __wb_iter_next(iter, bdi); + else + return &bdi->wb; +} + +/** + * bdi_for_each_wb - walk all wb's of a bdi in ascending blkcg ID order + * @wb_cur: cursor struct bdi_writeback pointer + * @bdi: bdi to walk wb's of + * @iter: pointer to struct wb_iter to be used as iteration buffer + * @start_blkcg_id: blkcg ID to start iteration from + * + * Iterate @wb_cur through the wb's (bdi_writeback's) of @bdi in ascending + * blkcg ID order starting from @start_blkcg_id. @iter is struct wb_iter + * to be used as temp storage during iteration. rcu_read_lock() must be + * held throughout iteration. + */ +#define bdi_for_each_wb(wb_cur, bdi, iter, start_blkcg_id) \ + for ((wb_cur) = __wb_iter_init(iter, bdi, start_blkcg_id); \ + (wb_cur); (wb_cur) = __wb_iter_next(iter, bdi)) + #else /* CONFIG_CGROUP_WRITEBACK */ static inline bool inode_cgwb_enabled(struct inode *inode) @@ -445,6 +500,14 @@ static inline void wb_blkcg_offline(struct blkcg *blkcg) { } +struct wb_iter { + int next_id; +}; + +#define bdi_for_each_wb(wb_cur, bdi, iter, start_blkcg_id) \ + for ((iter)->next_id = (start_blkcg_id); \ + ({ (wb_cur) = !(iter)->next_id++ ? &(bdi)->wb : NULL; }); ) + static inline int inode_congested(struct inode *inode, int cong_bits) { return wb_congested(&inode_to_bdi(inode)->wb, cong_bits); -- cgit v1.2.3 From c00ddad39f512b1a81e25b7892217ce10efab0f1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:51 -0400 Subject: writeback: remove bdi_start_writeback() bdi_start_writeback() is a thin wrapper on top of __wb_start_writeback() which is used only by laptop_mode_timer_fn(). This patches removes bdi_start_writeback(), renames __wb_start_writeback() to wb_start_writeback() and makes laptop_mode_timer_fn() use it instead. This doesn't cause any functional difference and will ease making laptop_mode_timer_fn() cgroup writeback aware. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index c7979806baee..0ff40c228bee 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -25,8 +25,8 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); void bdi_unregister(struct backing_dev_info *bdi); int __must_check bdi_setup_and_register(struct backing_dev_info *, char *); -void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, - enum wb_reason reason); +void wb_start_writeback(struct bdi_writeback *wb, long nr_pages, + bool range_cyclic, enum wb_reason reason); void bdi_start_background_writeback(struct backing_dev_info *bdi); void wb_workfn(struct work_struct *work); void wb_wakeup_delayed(struct bdi_writeback *wb); -- cgit v1.2.3 From bc05873dccd27d75d6acdf812c3edfb181f1ba17 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:53 -0400 Subject: writeback: make writeback_in_progress() take bdi_writeback instead of backing_dev_info writeback_in_progress() currently takes @bdi and returns whether writeback is in progress on its root wb (bdi_writeback). In preparation for cgroup writeback support, make it take wb instead. While at it, make it an inline function. This patch doesn't make any functional difference. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 0ff40c228bee..f04956c900ec 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -156,7 +156,17 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); extern struct backing_dev_info noop_backing_dev_info; -int writeback_in_progress(struct backing_dev_info *bdi); +/** + * writeback_in_progress - determine whether there is writeback in progress + * @wb: bdi_writeback of interest + * + * Determine whether there is writeback waiting to be handled against a + * bdi_writeback. + */ +static inline bool writeback_in_progress(struct bdi_writeback *wb) +{ + return test_bit(WB_writeback_running, &wb->state); +} static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) { -- cgit v1.2.3 From 9ecf4866c018aeb304a7b49216c4d183665becb7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:54 -0400 Subject: writeback: make bdi_start_background_writeback() take bdi_writeback instead of backing_dev_info bdi_start_background_writeback() currently takes @bdi and kicks the root wb (bdi_writeback). In preparation for cgroup writeback support, make it take wb instead. This patch doesn't make any functional difference. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index f04956c900ec..9cc11e5b97ca 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -27,7 +27,7 @@ void bdi_unregister(struct backing_dev_info *bdi); int __must_check bdi_setup_and_register(struct backing_dev_info *, char *); void wb_start_writeback(struct bdi_writeback *wb, long nr_pages, bool range_cyclic, enum wb_reason reason); -void bdi_start_background_writeback(struct backing_dev_info *bdi); +void wb_start_background_writeback(struct bdi_writeback *wb); void wb_workfn(struct work_struct *work); void wb_wakeup_delayed(struct bdi_writeback *wb); -- cgit v1.2.3 From cc395d7f1f7b9c740ab6d367ef1f6eb248595dff Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:58 -0400 Subject: writeback: implement bdi_wait_for_completion() If the completion of a wb_writeback_work can be waited upon by setting its ->done to a struct completion and waiting on it; however, for cgroup writeback support, it's necessary to issue multiple work items to multiple bdi_writebacks and wait for the completion of all. This patch implements wb_completion which can wait for multiple work items and replaces the struct completion with it. It can be defined using DEFINE_WB_COMPLETION_ONSTACK(), used for multiple work items and waited for by wb_wait_for_completion(). Nobody currently issues multiple work items and this patch doesn't introduce any behavior changes. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 8c857d723023..97a92fa0cdb5 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -155,6 +155,8 @@ struct backing_dev_info { struct rb_root cgwb_congested_tree; /* their congested states */ atomic_t usage_cnt; /* counts both cgwbs and cgwb_contested's */ #endif + wait_queue_head_t wb_waitq; + struct device *dev; struct timer_list laptop_mode_wb_timer; -- cgit v1.2.3 From f30a7d0cc8d9096d6728fadd0ab024e648010ec0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:14:00 -0400 Subject: writeback: restructure try_writeback_inodes_sb[_nr]() try_writeback_inodes_sb_nr() wraps writeback_inodes_sb_nr() so that it handles s_umount locking and skips if writeback is already in progress. The in progress test is performed on the root wb (bdi_writeback) which isn't sufficient for cgroup writeback support. The test must be done per-wb. To prepare for the change, this patch factors out __writeback_inodes_sb_nr() from writeback_inodes_sb_nr() and adds @skip_if_busy and moves the in progress test right before queueing the wb_writeback_work. try_writeback_inodes_sb_nr() now just grabs s_umount and invokes __writeback_inodes_sb_nr() with asserted @skip_if_busy. This way, later addition of multiple wb handling can skip only the wb's which already have writeback in progress. This swaps the order between in progress test and s_umount test which can flip the return value when writeback is in progress and s_umount is being held by someone else but this shouldn't cause any meaningful difference. It's a fringe condition and the return value is an unsynchronized hint anyway. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Signed-off-by: Jens Axboe --- include/linux/writeback.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index a6b9db7fcee8..23af355d5471 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -93,9 +93,9 @@ struct bdi_writeback; void writeback_inodes_sb(struct super_block *, enum wb_reason reason); void writeback_inodes_sb_nr(struct super_block *, unsigned long nr, enum wb_reason reason); -int try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason); -int try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr, - enum wb_reason reason); +bool try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason); +bool try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr, + enum wb_reason reason); void sync_inodes_sb(struct super_block *); void wakeup_flusher_threads(long nr_pages, enum wb_reason reason); void inode_wait_for_writeback(struct inode *inode); -- cgit v1.2.3 From bafc0dba1e20d84578d7098d32caf63441e5743d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 2 Jun 2015 08:37:23 -0600 Subject: buffer, writeback: make __block_write_full_page() honor cgroup writeback [__]block_write_full_page() is used to implement ->writepage in various filesystems. All writeback logic is now updated to handle cgroup writeback and the block cgroup to issue IOs for is encoded in writeback_control and can be retrieved from the inode; however, [__]block_write_full_page() currently ignores the blkcg indicated by inode and issues all bio's without explicit blkcg association. This patch adds submit_bh_blkcg() which associates the bio with the specified blkio cgroup before issuing and uses it in __block_write_full_page() so that the issued bio's are associated with inode_to_wb_blkcg_css(inode). v2: Updated for per-inode wb association. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Cc: Andrew Morton Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 9cc11e5b97ca..e9d7373f5f93 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -393,6 +393,12 @@ static inline struct bdi_writeback *inode_to_wb(struct inode *inode) return inode->i_wb; } +static inline struct cgroup_subsys_state * +inode_to_wb_blkcg_css(struct inode *inode) +{ + return inode_to_wb(inode)->blkcg_css; +} + struct wb_iter { int start_blkcg_id; struct radix_tree_iter tree_iter; @@ -510,6 +516,12 @@ static inline void wb_blkcg_offline(struct blkcg *blkcg) { } +static inline struct cgroup_subsys_state * +inode_to_wb_blkcg_css(struct inode *inode) +{ + return blkcg_root_css; +} + struct wb_iter { int next_id; }; -- cgit v1.2.3 From 0d960a383ae7aa791b2833e122ba7519d264cf92 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 18:23:19 -0400 Subject: writeback: clean up wb_dirty_limit() The function name wb_dirty_limit(), its argument @dirty and the local variable @wb_dirty are mortally confusing given that the function calculates per-wb threshold value not dirty pages, especially given that @dirty and @wb_dirty are used elsewhere for dirty pages. Let's rename the function to wb_calc_thresh() and wb_dirty to wb_thresh. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Cc: Wu Fengguang Cc: Greg Thelen Signed-off-by: Jens Axboe --- include/linux/writeback.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 23af355d5471..0435c85d4cfa 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -155,7 +155,7 @@ int dirty_writeback_centisecs_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); -unsigned long wb_dirty_limit(struct bdi_writeback *wb, unsigned long dirty); +unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); void __wb_update_bandwidth(struct bdi_writeback *wb, unsigned long thresh, -- cgit v1.2.3 From 8a73179956e649df0d4b3250db17734f272d8266 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 18:23:20 -0400 Subject: writeback: reorganize [__]wb_update_bandwidth() __wb_update_bandwidth() is called from two places - fs/fs-writeback.c::balance_dirty_pages() and mm/page-writeback.c::wb_writeback(). The latter updates only the write bandwidth while the former also deals with the dirty ratelimit. The two callsites are distinguished by whether @thresh parameter is zero or not, which is cryptic. In addition, the two files define their own different versions of wb_update_bandwidth() on top of __wb_update_bandwidth(), which is confusing to say the least. This patch cleans up [__]wb_update_bandwidth() in the following ways. * __wb_update_bandwidth() now takes explicit @update_ratelimit parameter to gate dirty ratelimit handling. * mm/page-writeback.c::wb_update_bandwidth() is flattened into its caller - balance_dirty_pages(). * fs/fs-writeback.c::wb_update_bandwidth() is moved to mm/page-writeback.c and __wb_update_bandwidth() is made static. * While at it, add a lockdep assertion to __wb_update_bandwidth(). Except for the lockdep addition, this is pure reorganization and doesn't introduce any behavioral changes. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Cc: Wu Fengguang Cc: Greg Thelen Signed-off-by: Jens Axboe --- include/linux/writeback.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 0435c85d4cfa..80adf3d88d9d 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -157,14 +157,7 @@ int dirty_writeback_centisecs_handler(struct ctl_table *, int, void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); -void __wb_update_bandwidth(struct bdi_writeback *wb, - unsigned long thresh, - unsigned long bg_thresh, - unsigned long dirty, - unsigned long bdi_thresh, - unsigned long bdi_dirty, - unsigned long start_time); - +void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time); void page_writeback_init(void); void balance_dirty_pages_ratelimited(struct address_space *mapping); -- cgit v1.2.3 From 380c27ca33ebecc9da35aa90c8b3a9154f90aac2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 18:23:21 -0400 Subject: writeback: implement wb_domain Dirtyable memory is distributed to a wb (bdi_writeback) according to the relative bandwidth the wb is writing out in the whole system. This distribution is global - each wb is measured against all other wb's and gets the proportinately sized portion of the memory in the whole system. For cgroup writeback, the amount of dirtyable memory is scoped by memcg and thus each wb would need to be measured and controlled in its memcg. IOW, a wb will belong to two writeback domains - the global and memcg domains. Currently, what constitutes the global writeback domain are scattered across a number of global states. This patch starts collecting them into struct wb_domain. * fprop_global which serves as the basis for proportional bandwidth measurement and its period timer are moved into struct wb_domain. * global_wb_domain hosts the states for the global domain. * While at it, flatten wb_writeout_fraction() into its callers. This thin wrapper doesn't provide any actual benefits while getting in the way. This is pure reorganization and doesn't introduce any behavioral changes. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Cc: Wu Fengguang Cc: Greg Thelen Signed-off-by: Jens Axboe --- include/linux/writeback.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 80adf3d88d9d..3148db1296a2 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -7,6 +7,7 @@ #include #include #include +#include DECLARE_PER_CPU(int, dirty_throttle_leaks); @@ -86,6 +87,36 @@ struct writeback_control { unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ }; +/* + * A wb_domain represents a domain that wb's (bdi_writeback's) belong to + * and are measured against each other in. There always is one global + * domain, global_wb_domain, that every wb in the system is a member of. + * This allows measuring the relative bandwidth of each wb to distribute + * dirtyable memory accordingly. + */ +struct wb_domain { + /* + * Scale the writeback cache size proportional to the relative + * writeout speed. + * + * We do this by keeping a floating proportion between BDIs, based + * on page writeback completions [end_page_writeback()]. Those + * devices that write out pages fastest will get the larger share, + * while the slower will get a smaller share. + * + * We use page writeout completions because we are interested in + * getting rid of dirty pages. Having them written out is the + * primary goal. + * + * We introduce a concept of time, a period over which we measure + * these events, because demand can/will vary over time. The length + * of this period itself is measured in page writeback completions. + */ + struct fprop_global completions; + struct timer_list period_timer; /* timer for aging of completions */ + unsigned long period_time; +}; + /* * fs/fs-writeback.c */ @@ -120,6 +151,7 @@ static inline void laptop_sync_completion(void) { } #endif void throttle_vm_writeout(gfp_t gfp_mask); bool zone_dirty_ok(struct zone *zone); +int wb_domain_init(struct wb_domain *dom, gfp_t gfp); extern unsigned long global_dirty_limit; -- cgit v1.2.3 From dcc25ae76eb7b8ff883eaaab57e30e8f2f085be3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 18:23:22 -0400 Subject: writeback: move global_dirty_limit into wb_domain This patch is a part of the series to define wb_domain which represents a domain that wb's (bdi_writeback's) belong to and are measured against each other in. This will enable IO backpressure propagation for cgroup writeback. global_dirty_limit exists to regulate the global dirty threshold which is a property of the wb_domain. This patch moves hard_dirty_limit, dirty_lock, and update_time into wb_domain. This is pure reorganization and doesn't introduce any behavioral changes. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Cc: Wu Fengguang Cc: Greg Thelen Signed-off-by: Jens Axboe --- include/linux/writeback.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 3148db1296a2..5fdd4e1805e6 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -95,6 +95,8 @@ struct writeback_control { * dirtyable memory accordingly. */ struct wb_domain { + spinlock_t lock; + /* * Scale the writeback cache size proportional to the relative * writeout speed. @@ -115,6 +117,19 @@ struct wb_domain { struct fprop_global completions; struct timer_list period_timer; /* timer for aging of completions */ unsigned long period_time; + + /* + * The dirtyable memory and dirty threshold could be suddenly + * knocked down by a large amount (eg. on the startup of KVM in a + * swapless system). This may throw the system into deep dirty + * exceeded state and throttle heavy/light dirtiers alike. To + * retain good responsiveness, maintain global_dirty_limit for + * tracking slowly down to the knocked down dirty threshold. + * + * Both fields are protected by ->lock. + */ + unsigned long dirty_limit_tstamp; + unsigned long dirty_limit; }; /* @@ -153,7 +168,7 @@ void throttle_vm_writeout(gfp_t gfp_mask); bool zone_dirty_ok(struct zone *zone); int wb_domain_init(struct wb_domain *dom, gfp_t gfp); -extern unsigned long global_dirty_limit; +extern struct wb_domain global_wb_domain; /* These are exported to sysctl. */ extern int dirty_background_ratio; -- cgit v1.2.3 From aa661bbe1e61ce80ca4ae98804f673ede94b0827 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 18:23:31 -0400 Subject: writeback: move over_bground_thresh() to mm/page-writeback.c and rename it to wb_over_bg_thresh(). The function is closely tied to the dirty throttling mechanism implemented in page-writeback.c. This relocation will allow future updates necessary for cgroup writeback support. While at it, add function comment. This is pure reorganization and doesn't introduce any behavioral changes. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Cc: Wu Fengguang Cc: Greg Thelen Signed-off-by: Jens Axboe --- include/linux/writeback.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 5fdd4e1805e6..b57c2786b5aa 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -207,6 +207,7 @@ unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time); void page_writeback_init(void); void balance_dirty_pages_ratelimited(struct address_space *mapping); +bool wb_over_bg_thresh(struct bdi_writeback *wb); typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, void *data); -- cgit v1.2.3 From 841710aa6e4acd066ab9fe8c8cb6f4e4e6709d83 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 18:23:33 -0400 Subject: writeback: implement memcg wb_domain Dirtyable memory is distributed to a wb (bdi_writeback) according to the relative bandwidth the wb is writing out in the whole system. This distribution is global - each wb is measured against all other wb's and gets the proportinately sized portion of the memory in the whole system. For cgroup writeback, the amount of dirtyable memory is scoped by memcg and thus each wb would need to be measured and controlled in its memcg. IOW, a wb will belong to two writeback domains - the global and memcg domains. The previous patches laid the groundwork to support the two wb_domains and this patch implements memcg wb_domain. memcg->cgwb_domain is initialized on css online and destroyed on css release, wb->memcg_completions is added, and __wb_writeout_inc() is updated to increment completions against both global and memcg wb_domains. The following patches will update balance_dirty_pages() and its subroutines to actually consider memcg wb_domain for throttling. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Cc: Wu Fengguang Cc: Greg Thelen Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 1 + include/linux/memcontrol.h | 12 +++++++++++- include/linux/writeback.h | 3 +++ 3 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 97a92fa0cdb5..8d470b73824f 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -118,6 +118,7 @@ struct bdi_writeback { #ifdef CONFIG_CGROUP_WRITEBACK struct percpu_ref refcnt; /* used only for !root wb's */ + struct fprop_local_percpu memcg_completions; struct cgroup_subsys_state *memcg_css; /* the associated memcg */ struct cgroup_subsys_state *blkcg_css; /* and blkcg */ struct list_head memcg_node; /* anchored at memcg->cgwb_list */ diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 662a953ea8ad..e3177bed23ea 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -389,8 +389,18 @@ enum { }; #ifdef CONFIG_CGROUP_WRITEBACK + struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg); -#endif +struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb); + +#else /* CONFIG_CGROUP_WRITEBACK */ + +static inline struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb) +{ + return NULL; +} + +#endif /* CONFIG_CGROUP_WRITEBACK */ struct sock; #if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM) diff --git a/include/linux/writeback.h b/include/linux/writeback.h index b57c2786b5aa..04a3786c456f 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -167,6 +167,9 @@ static inline void laptop_sync_completion(void) { } void throttle_vm_writeout(gfp_t gfp_mask); bool zone_dirty_ok(struct zone *zone); int wb_domain_init(struct wb_domain *dom, gfp_t gfp); +#ifdef CONFIG_CGROUP_WRITEBACK +void wb_domain_exit(struct wb_domain *dom); +#endif extern struct wb_domain global_wb_domain; -- cgit v1.2.3 From 2529bb3aadc40a93e642f5f3650f63379a964467 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 18:23:34 -0400 Subject: writeback: reset wb_domain->dirty_limit[_tstmp] when memcg domain size changes The amount of available memory to a memcg wb_domain can change as memcg configuration changes. A domain's ->dirty_limit exists to smooth out sudden drops in dirty threshold; however, when a domain's size actually drops significantly, it hinders the dirty throttling from adjusting to the new configuration leading to unexpected behaviors including unnecessary OOM kills. This patch resolves the issue by adding wb_domain_size_changed() which resets ->dirty_limit[_tstmp] and making memcg call it on configuration changes. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Cc: Wu Fengguang Cc: Greg Thelen Signed-off-by: Jens Axboe --- include/linux/writeback.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 04a3786c456f..3b73e97ecfc7 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -132,6 +132,26 @@ struct wb_domain { unsigned long dirty_limit; }; +/** + * wb_domain_size_changed - memory available to a wb_domain has changed + * @dom: wb_domain of interest + * + * This function should be called when the amount of memory available to + * @dom has changed. It resets @dom's dirty limit parameters to prevent + * the past values which don't match the current configuration from skewing + * dirty throttling. Without this, when memory size of a wb_domain is + * greatly reduced, the dirty throttling logic may allow too many pages to + * be dirtied leading to consecutive unnecessary OOMs and may get stuck in + * that situation. + */ +static inline void wb_domain_size_changed(struct wb_domain *dom) +{ + spin_lock(&dom->lock); + dom->dirty_limit_tstamp = jiffies; + dom->dirty_limit = 0; + spin_unlock(&dom->lock); +} + /* * fs/fs-writeback.c */ -- cgit v1.2.3 From c2aa723a6093633ae4ec15b08a4db276643cab3e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 18:23:35 -0400 Subject: writeback: implement memcg writeback domain based throttling While cgroup writeback support now connects memcg and blkcg so that writeback IOs are properly attributed and controlled, the IO back pressure propagation mechanism implemented in balance_dirty_pages() and its subroutines wasn't aware of cgroup writeback. Processes belonging to a memcg may have access to only subset of total memory available in the system and not factoring this into dirty throttling rendered it completely ineffective for processes under memcg limits and memcg ended up building a separate ad-hoc degenerate mechanism directly into vmscan code to limit page dirtying. The previous patches updated balance_dirty_pages() and its subroutines so that they can deal with multiple wb_domain's (writeback domains) and defined per-memcg wb_domain. Processes belonging to a non-root memcg are bound to two wb_domains, global wb_domain and memcg wb_domain, and should be throttled according to IO pressures from both domains. This patch updates dirty throttling code so that it repeats similar calculations for the two domains - the differences between the two are few and minor - and applies the lower of the two sets of resulting constraints. wb_over_bg_thresh(), which controls when background writeback terminates, is also updated to consider both global and memcg wb_domains. It returns true if dirty is over bg_thresh for either domain. This makes the dirty throttling mechanism operational for memcg domains including writeback-bandwidth-proportional dirty page distribution inside them but the ad-hoc memcg throttling mechanism in vmscan is still in place. The next patch will rip it out. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Cc: Wu Fengguang Cc: Greg Thelen Signed-off-by: Jens Axboe --- include/linux/memcontrol.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e3177bed23ea..c3eb19e2bc1c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -392,6 +392,8 @@ enum { struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg); struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb); +void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pavail, + unsigned long *pdirty, unsigned long *pwriteback); #else /* CONFIG_CGROUP_WRITEBACK */ @@ -400,6 +402,13 @@ static inline struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb) return NULL; } +static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb, + unsigned long *pavail, + unsigned long *pdirty, + unsigned long *pwriteback) +{ +} + #endif /* CONFIG_CGROUP_WRITEBACK */ struct sock; -- cgit v1.2.3 From 21c6321fbb3a3787af07f1bc031d713a707fb69c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 28 May 2015 14:50:49 -0400 Subject: writeback: relocate wb[_try]_get(), wb_put(), inode_{attach|detach}_wb() Currently, majority of cgroup writeback support including all the above functions are implemented in include/linux/backing-dev.h and mm/backing-dev.c; however, the portion closely related to writeback logic implemented in include/linux/writeback.h and mm/page-writeback.c will expand to support foreign writeback detection and correction. This patch moves wb[_try]_get() and wb_put() to include/linux/backing-dev-defs.h so that they can be used from writeback.h and inode_{attach|detach}_wb() to writeback.h and page-writeback.c. This is pure reorganization and doesn't introduce any functional changes. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Cc: Wu Fengguang Cc: Greg Thelen Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 50 ++++++++++++++++++++++++ include/linux/backing-dev.h | 82 ---------------------------------------- include/linux/writeback.h | 46 ++++++++++++++++++++++ 3 files changed, 96 insertions(+), 82 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 8d470b73824f..e047b496a0b9 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -186,4 +186,54 @@ static inline void set_bdi_congested(struct backing_dev_info *bdi, int sync) set_wb_congested(bdi->wb.congested, sync); } +#ifdef CONFIG_CGROUP_WRITEBACK + +/** + * wb_tryget - try to increment a wb's refcount + * @wb: bdi_writeback to get + */ +static inline bool wb_tryget(struct bdi_writeback *wb) +{ + if (wb != &wb->bdi->wb) + return percpu_ref_tryget(&wb->refcnt); + return true; +} + +/** + * wb_get - increment a wb's refcount + * @wb: bdi_writeback to get + */ +static inline void wb_get(struct bdi_writeback *wb) +{ + if (wb != &wb->bdi->wb) + percpu_ref_get(&wb->refcnt); +} + +/** + * wb_put - decrement a wb's refcount + * @wb: bdi_writeback to put + */ +static inline void wb_put(struct bdi_writeback *wb) +{ + if (wb != &wb->bdi->wb) + percpu_ref_put(&wb->refcnt); +} + +#else /* CONFIG_CGROUP_WRITEBACK */ + +static inline bool wb_tryget(struct bdi_writeback *wb) +{ + return true; +} + +static inline void wb_get(struct bdi_writeback *wb) +{ +} + +static inline void wb_put(struct bdi_writeback *wb) +{ +} + +#endif /* CONFIG_CGROUP_WRITEBACK */ + #endif /* __LINUX_BACKING_DEV_DEFS_H */ diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index e9d7373f5f93..5c978a924157 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -243,7 +243,6 @@ void wb_congested_put(struct bdi_writeback_congested *congested); struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi, struct cgroup_subsys_state *memcg_css, gfp_t gfp); -void __inode_attach_wb(struct inode *inode, struct page *page); void wb_memcg_offline(struct mem_cgroup *memcg); void wb_blkcg_offline(struct blkcg *blkcg); int inode_congested(struct inode *inode, int cong_bits); @@ -264,37 +263,6 @@ static inline bool inode_cgwb_enabled(struct inode *inode) (inode->i_sb->s_type->fs_flags & FS_CGROUP_WRITEBACK); } -/** - * wb_tryget - try to increment a wb's refcount - * @wb: bdi_writeback to get - */ -static inline bool wb_tryget(struct bdi_writeback *wb) -{ - if (wb != &wb->bdi->wb) - return percpu_ref_tryget(&wb->refcnt); - return true; -} - -/** - * wb_get - increment a wb's refcount - * @wb: bdi_writeback to get - */ -static inline void wb_get(struct bdi_writeback *wb) -{ - if (wb != &wb->bdi->wb) - percpu_ref_get(&wb->refcnt); -} - -/** - * wb_put - decrement a wb's refcount - * @wb: bdi_writeback to put - */ -static inline void wb_put(struct bdi_writeback *wb) -{ - if (wb != &wb->bdi->wb) - percpu_ref_put(&wb->refcnt); -} - /** * wb_find_current - find wb for %current on a bdi * @bdi: bdi of interest @@ -353,35 +321,6 @@ wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp) return wb; } -/** - * inode_attach_wb - associate an inode with its wb - * @inode: inode of interest - * @page: page being dirtied (may be NULL) - * - * If @inode doesn't have its wb, associate it with the wb matching the - * memcg of @page or, if @page is NULL, %current. May be called w/ or w/o - * @inode->i_lock. - */ -static inline void inode_attach_wb(struct inode *inode, struct page *page) -{ - if (!inode->i_wb) - __inode_attach_wb(inode, page); -} - -/** - * inode_detach_wb - disassociate an inode from its wb - * @inode: inode of interest - * - * @inode is being freed. Detach from its wb. - */ -static inline void inode_detach_wb(struct inode *inode) -{ - if (inode->i_wb) { - wb_put(inode->i_wb); - inode->i_wb = NULL; - } -} - /** * inode_to_wb - determine the wb of an inode * @inode: inode of interest @@ -471,19 +410,6 @@ static inline void wb_congested_put(struct bdi_writeback_congested *congested) { } -static inline bool wb_tryget(struct bdi_writeback *wb) -{ - return true; -} - -static inline void wb_get(struct bdi_writeback *wb) -{ -} - -static inline void wb_put(struct bdi_writeback *wb) -{ -} - static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi) { return &bdi->wb; @@ -495,14 +421,6 @@ wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp) return &bdi->wb; } -static inline void inode_attach_wb(struct inode *inode, struct page *page) -{ -} - -static inline void inode_detach_wb(struct inode *inode) -{ -} - static inline struct bdi_writeback *inode_to_wb(struct inode *inode) { return &inode_to_bdi(inode)->wb; diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 3b73e97ecfc7..6726b7e56beb 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -8,6 +8,7 @@ #include #include #include +#include DECLARE_PER_CPU(int, dirty_throttle_leaks); @@ -173,6 +174,51 @@ static inline void wait_on_inode(struct inode *inode) wait_on_bit(&inode->i_state, __I_NEW, TASK_UNINTERRUPTIBLE); } +#ifdef CONFIG_CGROUP_WRITEBACK + +void __inode_attach_wb(struct inode *inode, struct page *page); + +/** + * inode_attach_wb - associate an inode with its wb + * @inode: inode of interest + * @page: page being dirtied (may be NULL) + * + * If @inode doesn't have its wb, associate it with the wb matching the + * memcg of @page or, if @page is NULL, %current. May be called w/ or w/o + * @inode->i_lock. + */ +static inline void inode_attach_wb(struct inode *inode, struct page *page) +{ + if (!inode->i_wb) + __inode_attach_wb(inode, page); +} + +/** + * inode_detach_wb - disassociate an inode from its wb + * @inode: inode of interest + * + * @inode is being freed. Detach from its wb. + */ +static inline void inode_detach_wb(struct inode *inode) +{ + if (inode->i_wb) { + wb_put(inode->i_wb); + inode->i_wb = NULL; + } +} + +#else /* CONFIG_CGROUP_WRITEBACK */ + +static inline void inode_attach_wb(struct inode *inode, struct page *page) +{ +} + +static inline void inode_detach_wb(struct inode *inode) +{ +} + +#endif /* CONFIG_CGROUP_WRITEBACK */ + /* * mm/page-writeback.c */ -- cgit v1.2.3 From b16b1deb553adcd7b3b7ce3e6d6fd1b923f314da Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 2 Jun 2015 08:39:48 -0600 Subject: writeback: make writeback_control track the inode being written back Currently, for cgroup writeback, the IO submission paths directly associate the bio's with the blkcg from inode_to_wb_blkcg_css(); however, it'd be necessary to keep more writeback context to implement foreign inode writeback detection. wbc (writeback_control) is the natural fit for the extra context - it persists throughout the writeback of each inode and is passed all the way down to IO submission paths. This patch adds wbc_attach_and_unlock_inode(), wbc_detach_inode(), and wbc_attach_fdatawrite_inode() which are used to associate wbc with the inode being written back. IO submission paths now use wbc_init_bio() instead of directly associating bio's with blkcg themselves. This leaves inode_to_wb_blkcg_css() w/o any user. The function is removed. wbc currently only tracks the associated wb (bdi_writeback). Future patches will add more for foreign inode detection. The association is established under i_lock which will be depended upon when migrating foreign inodes to other wb's. As currently, once established, inode to wb association never changes, going through wbc when initializing bio's doesn't cause any behavior changes. v2: submit_blk_blkcg() now checks whether the wbc is associated with a wb before dereferencing it. This can happen when pageout() is writing pages directly without going through the usual writeback path. As pageout() path is single-threaded, we don't want it to be blocked behind a slow cgroup and ultimately want it to delegate actual writing to the usual writeback path. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Cc: Wu Fengguang Cc: Greg Thelen Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 12 -------- include/linux/writeback.h | 68 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 5c978a924157..b1d2489a6536 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -332,12 +332,6 @@ static inline struct bdi_writeback *inode_to_wb(struct inode *inode) return inode->i_wb; } -static inline struct cgroup_subsys_state * -inode_to_wb_blkcg_css(struct inode *inode) -{ - return inode_to_wb(inode)->blkcg_css; -} - struct wb_iter { int start_blkcg_id; struct radix_tree_iter tree_iter; @@ -434,12 +428,6 @@ static inline void wb_blkcg_offline(struct blkcg *blkcg) { } -static inline struct cgroup_subsys_state * -inode_to_wb_blkcg_css(struct inode *inode) -{ - return blkcg_root_css; -} - struct wb_iter { int next_id; }; diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 6726b7e56beb..8f964e558af5 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -86,6 +86,9 @@ struct writeback_control { unsigned for_reclaim:1; /* Invoked from the page allocator */ unsigned range_cyclic:1; /* range_start is cyclic */ unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ +#ifdef CONFIG_CGROUP_WRITEBACK + struct bdi_writeback *wb; /* wb this writeback is issued under */ +#endif }; /* @@ -176,7 +179,14 @@ static inline void wait_on_inode(struct inode *inode) #ifdef CONFIG_CGROUP_WRITEBACK +#include +#include + void __inode_attach_wb(struct inode *inode, struct page *page); +void wbc_attach_and_unlock_inode(struct writeback_control *wbc, + struct inode *inode) + __releases(&inode->i_lock); +void wbc_detach_inode(struct writeback_control *wbc); /** * inode_attach_wb - associate an inode with its wb @@ -207,6 +217,44 @@ static inline void inode_detach_wb(struct inode *inode) } } +/** + * wbc_attach_fdatawrite_inode - associate wbc and inode for fdatawrite + * @wbc: writeback_control of interest + * @inode: target inode + * + * This function is to be used by __filemap_fdatawrite_range(), which is an + * alternative entry point into writeback code, and first ensures @inode is + * associated with a bdi_writeback and attaches it to @wbc. + */ +static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, + struct inode *inode) +{ + spin_lock(&inode->i_lock); + inode_attach_wb(inode, NULL); + wbc_attach_and_unlock_inode(wbc, inode); +} + +/** + * wbc_init_bio - writeback specific initializtion of bio + * @wbc: writeback_control for the writeback in progress + * @bio: bio to be initialized + * + * @bio is a part of the writeback in progress controlled by @wbc. Perform + * writeback specific initialization. This is used to apply the cgroup + * writeback context. + */ +static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) +{ + /* + * pageout() path doesn't attach @wbc to the inode being written + * out. This is intentional as we don't want the function to block + * behind a slow cgroup. Ultimately, we want pageout() to kick off + * regular writeback instead of writing things out itself. + */ + if (wbc->wb) + bio_associate_blkcg(bio, wbc->wb->blkcg_css); +} + #else /* CONFIG_CGROUP_WRITEBACK */ static inline void inode_attach_wb(struct inode *inode, struct page *page) @@ -217,6 +265,26 @@ static inline void inode_detach_wb(struct inode *inode) { } +static inline void wbc_attach_and_unlock_inode(struct writeback_control *wbc, + struct inode *inode) + __releases(&inode->i_lock) +{ + spin_unlock(&inode->i_lock); +} + +static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, + struct inode *inode) +{ +} + +static inline void wbc_detach_inode(struct writeback_control *wbc) +{ +} + +static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) +{ +} + #endif /* CONFIG_CGROUP_WRITEBACK */ /* -- cgit v1.2.3 From 2a81490811d0296d390c571bb64eaa93e5ed7def Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 28 May 2015 14:50:51 -0400 Subject: writeback: implement foreign cgroup inode detection As concurrent write sharing of an inode is expected to be very rare and memcg only tracks page ownership on first-use basis severely confining the usefulness of such sharing, cgroup writeback tracks ownership per-inode. While the support for concurrent write sharing of an inode is deemed unnecessary, an inode being written to by different cgroups at different points in time is a lot more common, and, more importantly, charging only by first-use can too readily lead to grossly incorrect behaviors (single foreign page can lead to gigabytes of writeback to be incorrectly attributed). To resolve this issue, cgroup writeback detects the majority dirtier of an inode and will transfer the ownership to it. To avoid unnnecessary oscillation, the detection mechanism keeps track of history and gives out the switch verdict only if the foreign usage pattern is stable over a certain amount of time and/or writeback attempts. The detection mechanism has fairly low space and computation overhead. It adds 8 bytes to struct inode (one int and two u16's) and minimal amount of calculation per IO. The detection mechanism converges to the correct answer usually in several seconds of IO time when there's a clear majority dirtier. Even when there isn't, it can reach an acceptable answer fairly quickly under most circumstances. Please see wb_detach_inode() for more details. This patch only implements detection. Following patches will implement actual switching. v2: wbc_account_io() now checks whether the wbc is associated with a wb before dereferencing it. This can happen when pageout() is writing pages directly without going through the usual writeback path. As pageout() path is single-threaded, we don't want it to be blocked behind a slow cgroup and ultimately want it to delegate actual writing to the usual writeback path. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Cc: Wu Fengguang Cc: Greg Thelen Signed-off-by: Jens Axboe --- include/linux/fs.h | 5 +++++ include/linux/writeback.h | 16 ++++++++++++++++ 2 files changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 67a42ec95065..740126d7c44e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -638,6 +638,11 @@ struct inode { struct list_head i_wb_list; /* backing dev IO list */ #ifdef CONFIG_CGROUP_WRITEBACK struct bdi_writeback *i_wb; /* the associated cgroup wb */ + + /* foreign inode detection, see wbc_detach_inode() */ + int i_wb_frn_winner; + u16 i_wb_frn_avg_time; + u16 i_wb_frn_history; #endif struct list_head i_lru; /* inode LRU list */ struct list_head i_sb_list; diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 8f964e558af5..b333c945e571 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -88,6 +88,15 @@ struct writeback_control { unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ #ifdef CONFIG_CGROUP_WRITEBACK struct bdi_writeback *wb; /* wb this writeback is issued under */ + struct inode *inode; /* inode being written out */ + + /* foreign inode detection, see wbc_detach_inode() */ + int wb_id; /* current wb id */ + int wb_lcand_id; /* last foreign candidate wb id */ + int wb_tcand_id; /* this foreign candidate wb id */ + size_t wb_bytes; /* bytes written by current wb */ + size_t wb_lcand_bytes; /* bytes written by last candidate */ + size_t wb_tcand_bytes; /* bytes written by this candidate */ #endif }; @@ -187,6 +196,8 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc, struct inode *inode) __releases(&inode->i_lock); void wbc_detach_inode(struct writeback_control *wbc); +void wbc_account_io(struct writeback_control *wbc, struct page *page, + size_t bytes); /** * inode_attach_wb - associate an inode with its wb @@ -285,6 +296,11 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) { } +static inline void wbc_account_io(struct writeback_control *wbc, + struct page *page, size_t bytes) +{ +} + #endif /* CONFIG_CGROUP_WRITEBACK */ /* -- cgit v1.2.3 From 682aa8e1a6a1504a4caaa62e6c2c9daae3757210 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 28 May 2015 14:50:53 -0400 Subject: writeback: implement unlocked_inode_to_wb transaction and use it for stat updates The mechanism for detecting whether an inode should switch its wb (bdi_writeback) association is now in place. This patch build the framework for the actual switching. This patch adds a new inode flag I_WB_SWITCHING, which has two functions. First, the easy one, it ensures that there's only one switching in progress for a give inode. Second, it's used as a mechanism to synchronize wb stat updates. The two stats, WB_RECLAIMABLE and WB_WRITEBACK, aren't event counters but track the current number of dirty pages and pages under writeback respectively. As such, when an inode is moved from one wb to another, the inode's portion of those stats have to be transferred together; unfortunately, this is a bit tricky as those stat updates are percpu operations which are performed without holding any lock in some places. This patch solves the problem in a similar way as memcg. Each such lockless stat updates are wrapped in transaction surrounded by unlocked_inode_to_wb_begin/end(). During normal operation, they map to rcu_read_lock/unlock(); however, if I_WB_SWITCHING is asserted, mapping->tree_lock is grabbed across the transaction. In turn, the switching path sets I_WB_SWITCHING and waits for a RCU grace period to pass before actually starting to switch, which guarantees that all stat update paths are synchronizing against mapping->tree_lock. This patch still doesn't implement the actual switching. v3: Updated on top of the recent cancel_dirty_page() updates. unlocked_inode_to_wb_begin() now nests inside mem_cgroup_begin_page_stat() to match the locking order. v2: The i_wb access transaction will be used for !stat accesses too. Function names and comments updated accordingly. s/inode_wb_stat_unlocked_{begin|end}/unlocked_inode_to_wb_{begin|end}/ s/switch_wb/switch_wbs/ Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Cc: Wu Fengguang Cc: Greg Thelen Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 54 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 6 +++++ include/linux/mm.h | 3 ++- 3 files changed, 62 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index b1d2489a6536..73ffa32e58ee 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -332,6 +332,50 @@ static inline struct bdi_writeback *inode_to_wb(struct inode *inode) return inode->i_wb; } +/** + * unlocked_inode_to_wb_begin - begin unlocked inode wb access transaction + * @inode: target inode + * @lockedp: temp bool output param, to be passed to the end function + * + * The caller wants to access the wb associated with @inode but isn't + * holding inode->i_lock, mapping->tree_lock or wb->list_lock. This + * function determines the wb associated with @inode and ensures that the + * association doesn't change until the transaction is finished with + * unlocked_inode_to_wb_end(). + * + * The caller must call unlocked_inode_to_wb_end() with *@lockdep + * afterwards and can't sleep during transaction. IRQ may or may not be + * disabled on return. + */ +static inline struct bdi_writeback * +unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp) +{ + rcu_read_lock(); + + /* + * Paired with store_release in inode_switch_wb_work_fn() and + * ensures that we see the new wb if we see cleared I_WB_SWITCH. + */ + *lockedp = smp_load_acquire(&inode->i_state) & I_WB_SWITCH; + + if (unlikely(*lockedp)) + spin_lock_irq(&inode->i_mapping->tree_lock); + return inode_to_wb(inode); +} + +/** + * unlocked_inode_to_wb_end - end inode wb access transaction + * @inode: target inode + * @locked: *@lockedp from unlocked_inode_to_wb_begin() + */ +static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked) +{ + if (unlikely(locked)) + spin_unlock_irq(&inode->i_mapping->tree_lock); + + rcu_read_unlock(); +} + struct wb_iter { int start_blkcg_id; struct radix_tree_iter tree_iter; @@ -420,6 +464,16 @@ static inline struct bdi_writeback *inode_to_wb(struct inode *inode) return &inode_to_bdi(inode)->wb; } +static inline struct bdi_writeback * +unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp) +{ + return inode_to_wb(inode); +} + +static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked) +{ +} + static inline void wb_memcg_offline(struct mem_cgroup *memcg) { } diff --git a/include/linux/fs.h b/include/linux/fs.h index 740126d7c44e..b5e1dcfbc5e3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1815,6 +1815,11 @@ struct super_operations { * * I_DIO_WAKEUP Never set. Only used as a key for wait_on_bit(). * + * I_WB_SWITCH Cgroup bdi_writeback switching in progress. Used to + * synchronize competing switching instances and to tell + * wb stat updates to grab mapping->tree_lock. See + * inode_switch_wb_work_fn() for details. + * * Q: What is the difference between I_WILL_FREE and I_FREEING? */ #define I_DIRTY_SYNC (1 << 0) @@ -1834,6 +1839,7 @@ struct super_operations { #define I_DIRTY_TIME (1 << 11) #define __I_DIRTY_TIME_EXPIRED 12 #define I_DIRTY_TIME_EXPIRED (1 << __I_DIRTY_TIME_EXPIRED) +#define I_WB_SWITCH (1 << 13) #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) #define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME) diff --git a/include/linux/mm.h b/include/linux/mm.h index f48d979ced4b..4024543b4203 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -27,6 +27,7 @@ struct anon_vma_chain; struct file_ra_state; struct user_struct; struct writeback_control; +struct bdi_writeback; #ifndef CONFIG_NEED_MULTIPLE_NODES /* Don't use mapnrs, do it properly */ extern unsigned long max_mapnr; @@ -1214,7 +1215,7 @@ int redirty_page_for_writepage(struct writeback_control *wbc, void account_page_dirtied(struct page *page, struct address_space *mapping, struct mem_cgroup *memcg); void account_page_cleaned(struct page *page, struct address_space *mapping, - struct mem_cgroup *memcg); + struct mem_cgroup *memcg, struct bdi_writeback *wb); int set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); void cancel_dirty_page(struct page *page); -- cgit v1.2.3 From aaa2cacf8184e2a92accb8e443b1608d65f9a13f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 28 May 2015 14:50:55 -0400 Subject: writeback: add lockdep annotation to inode_to_wb() With the previous three patches, all operations which acquire wb from inode are either under one of inode->i_lock, mapping->tree_lock or wb->list_lock or protected by unlocked_inode_to_wb transaction. This will be depended upon by foreign inode wb switching. This patch adds lockdep assertion to inode_to_wb() so that usages outside the above list locks can be caught easily. There are three exceptions. * locked_inode_to_wb_and_lock_list() is holding wb->list_lock but the wb may not be the inode's. Ensuring that is the function's role after all. Updated to deref inode->i_wb directly. * inode_wb_stat_unlocked_begin() is usually protected by combination of !I_WB_SWITCH and rcu_read_lock(). Updated to deref inode->i_wb directly. * inode_congested() wants to test whether inode->i_wb is set before starting the transaction. Added inode_to_wb_is_valid() which tests inode->i_wb directly. v5: might_lock() removed. It annotates that the lock is grabbed w/ irq enabled which isn't the case and triggering lockdep warning spuriously. v4: might_lock() added to unlocked_inode_to_wb_begin(). v3: inode_congested() conversion added. v2: locked_inode_to_wb_and_lock_list() was missing in the first version. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Cc: Wu Fengguang Cc: Greg Thelen Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 73ffa32e58ee..dfce80869145 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -321,14 +321,34 @@ wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp) return wb; } +/** + * inode_to_wb_is_valid - test whether an inode has a wb associated + * @inode: inode of interest + * + * Returns %true if @inode has a wb associated. May be called without any + * locking. + */ +static inline bool inode_to_wb_is_valid(struct inode *inode) +{ + return inode->i_wb; +} + /** * inode_to_wb - determine the wb of an inode * @inode: inode of interest * - * Returns the wb @inode is currently associated with. + * Returns the wb @inode is currently associated with. The caller must be + * holding either @inode->i_lock, @inode->i_mapping->tree_lock, or the + * associated wb's list_lock. */ static inline struct bdi_writeback *inode_to_wb(struct inode *inode) { +#ifdef CONFIG_LOCKDEP + WARN_ON_ONCE(debug_locks && + (!lockdep_is_held(&inode->i_lock) && + !lockdep_is_held(&inode->i_mapping->tree_lock) && + !lockdep_is_held(&inode->i_wb->list_lock))); +#endif return inode->i_wb; } @@ -360,7 +380,12 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp) if (unlikely(*lockedp)) spin_lock_irq(&inode->i_mapping->tree_lock); - return inode_to_wb(inode); + + /* + * Protected by either !I_WB_SWITCH + rcu_read_lock() or tree_lock. + * inode_to_wb() will bark. Deref directly. + */ + return inode->i_wb; } /** @@ -459,6 +484,11 @@ wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp) return &bdi->wb; } +static inline bool inode_to_wb_is_valid(struct inode *inode) +{ + return true; +} + static inline struct bdi_writeback *inode_to_wb(struct inode *inode) { return &inode_to_bdi(inode)->wb; -- cgit v1.2.3 From e8a7abf5a5bd302a1e06a3c21a629eaa4cba57d6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 28 May 2015 14:50:57 -0400 Subject: writeback: disassociate inodes from dying bdi_writebacks For the purpose of foreign inode detection, wb's (bdi_writeback's) are identified by the associated memcg ID. As we create a separate wb for each memcg, this is enough to identify the active wb's; however, when blkcg is enabled or disabled higher up in the hierarchy, the mapping between memcg and blkcg changes which in turn creates a new wb to service the new mapping. The old wb is unlinked from index and released after all references are drained. The foreign inode detection logic can't detect this condition because both the old and new wb's point to the same memcg and thus never decides to move inodes attached to the old wb to the new one. This patch adds logic to initiate switching immediately in wbc_attach_and_unlock_inode() if the associated wb is dying. We can make the usual foreign detection logic to distinguish the different wb's mapped to the memcg but the dying wb is never gonna be in active service again and there's no point in tracking the usage history and reaching the switch verdict after enough data points are collected. It's already known that the wb has to be switched. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Cc: Wu Fengguang Cc: Greg Thelen Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index e047b496a0b9..a48d90e3bcbb 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -219,6 +219,17 @@ static inline void wb_put(struct bdi_writeback *wb) percpu_ref_put(&wb->refcnt); } +/** + * wb_dying - is a wb dying? + * @wb: bdi_writeback of interest + * + * Returns whether @wb is unlinked and being drained. + */ +static inline bool wb_dying(struct bdi_writeback *wb) +{ + return percpu_ref_is_dying(&wb->refcnt); +} + #else /* CONFIG_CGROUP_WRITEBACK */ static inline bool wb_tryget(struct bdi_writeback *wb) @@ -234,6 +245,11 @@ static inline void wb_put(struct bdi_writeback *wb) { } +static inline bool wb_dying(struct bdi_writeback *wb) +{ + return false; +} + #endif /* CONFIG_CGROUP_WRITEBACK */ #endif /* __LINUX_BACKING_DEV_DEFS_H */ -- cgit v1.2.3 From be3ef76e9d9b97962c70bd6351787d29071ae481 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 2 Jun 2015 14:30:11 +0200 Subject: clockevents: Rename state to state_use_accessors The only sensible way to make abuse of core internal fields obvious and easy to grep for. Signed-off-by: Thomas Gleixner Cc: Viresh Kumar Cc: Peter Zijlstra --- include/linux/clockchips.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 64214ad85af9..597a1e836f22 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -87,7 +87,7 @@ enum clock_event_state { * @mult: nanosecond to cycles multiplier * @shift: nanoseconds to cycles divisor (power of two) * @mode: operating mode, relevant only to ->set_mode(), OBSOLETE - * @state: current state of the device, assigned by the core code + * @state_use_accessors:current state of the device, assigned by the core code * @features: features * @retries: number of forced programming retries * @set_mode: legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME. @@ -117,7 +117,7 @@ struct clock_event_device { u32 mult; u32 shift; enum clock_event_mode mode; - enum clock_event_state state; + enum clock_event_state state_use_accessors; unsigned int features; unsigned long retries; @@ -152,27 +152,27 @@ struct clock_event_device { /* Helpers to verify state of a clockevent device */ static inline bool clockevent_state_detached(struct clock_event_device *dev) { - return dev->state == CLOCK_EVT_STATE_DETACHED; + return dev->state_use_accessors == CLOCK_EVT_STATE_DETACHED; } static inline bool clockevent_state_shutdown(struct clock_event_device *dev) { - return dev->state == CLOCK_EVT_STATE_SHUTDOWN; + return dev->state_use_accessors == CLOCK_EVT_STATE_SHUTDOWN; } static inline bool clockevent_state_periodic(struct clock_event_device *dev) { - return dev->state == CLOCK_EVT_STATE_PERIODIC; + return dev->state_use_accessors == CLOCK_EVT_STATE_PERIODIC; } static inline bool clockevent_state_oneshot(struct clock_event_device *dev) { - return dev->state == CLOCK_EVT_STATE_ONESHOT; + return dev->state_use_accessors == CLOCK_EVT_STATE_ONESHOT; } static inline bool clockevent_state_oneshot_stopped(struct clock_event_device *dev) { - return dev->state == CLOCK_EVT_STATE_ONESHOT_STOPPED; + return dev->state_use_accessors == CLOCK_EVT_STATE_ONESHOT_STOPPED; } /* -- cgit v1.2.3 From 24bbd929e6b9e62afd263c42b4318d3b603c956c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 1 Jun 2015 13:40:31 +0200 Subject: of/fdt: split off FDT self reservation from memreserve processing This splits off the reservation of the memory occupied by the FDT binary itself from the processing of the memory reservations it contains. This is necessary because the physical address of the FDT, which is needed to perform the reservation, may not be known to the FDT driver core, i.e., it may be mapped outside the linear direct mapping, in which case __pa() returns a bogus value. Cc: Russell King Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Acked-by: Rob Herring Acked-by: Mark Rutland Acked-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- include/linux/of_fdt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 587ee507965d..fd627a58068f 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -64,6 +64,7 @@ extern int early_init_dt_scan_chosen(unsigned long node, const char *uname, extern int early_init_dt_scan_memory(unsigned long node, const char *uname, int depth, void *data); extern void early_init_fdt_scan_reserved_mem(void); +extern void early_init_fdt_reserve_self(void); extern void early_init_dt_add_memory_arch(u64 base, u64 size); extern int early_init_dt_reserve_memory_arch(phys_addr_t base, phys_addr_t size, bool no_map); @@ -91,6 +92,7 @@ extern u64 fdt_translate_address(const void *blob, int node_offset); extern void of_fdt_limit_memory(int limit); #else /* CONFIG_OF_FLATTREE */ static inline void early_init_fdt_scan_reserved_mem(void) {} +static inline void early_init_fdt_reserve_self(void) {} static inline const char *of_flat_dt_get_machine_name(void) { return NULL; } static inline void unflatten_device_tree(void) {} static inline void unflatten_and_copy_device_tree(void) {} -- cgit v1.2.3 From 632dda833e28fe43049a01b4bc53e409176e7843 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 May 2015 14:04:50 -0400 Subject: SUNRPC: Clean up bc_send() Clean up: Merge bc_send() into bc_svc_process(). Note: even thought this touches svc.c, it is a client-side change. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/bc_xprt.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index 2ca67b55e0fe..8df43c9f11dc 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -37,7 +37,6 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied); void xprt_free_bc_request(struct rpc_rqst *req); int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs); -int bc_send(struct rpc_rqst *req); /* * Determine if a shared backchannel is in use -- cgit v1.2.3 From 1e25aa9641e8f3fa39cd5e46b4afcafd7f12a44b Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 1 Jun 2015 16:36:27 -0700 Subject: hid-sensor: Fix suspend/resume delay By default all the sensors are runtime suspended state (lowest power state). During Linux suspend process, all the run time suspended devices are resumed and then suspended. This caused all sensors to power up and introduced delay in suspend time, when we introduced runtime PM for HID sensors. The opposite process happens during resume process. To fix this, we do powerup process of the sensors only when the request is issued from user (raw or tiggerred). In this way when runtime, resume calls for powerup it will simply return as this will not match user requested state. Note this is a regression fix as the increase in suspend / resume times can be substantial (report of 8 seconds on Len's laptop!) Signed-off-by: Srinivas Pandruvada Tested-by: Len Brown Cc: Signed-off-by: Jonathan Cameron --- include/linux/hid-sensor-hub.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h index 0408421d885f..cd224dfd94d8 100644 --- a/include/linux/hid-sensor-hub.h +++ b/include/linux/hid-sensor-hub.h @@ -230,6 +230,7 @@ struct hid_sensor_common { struct platform_device *pdev; unsigned usage_id; atomic_t data_ready; + atomic_t user_requested_state; struct iio_trigger *trigger; struct hid_sensor_hub_attribute_info poll; struct hid_sensor_hub_attribute_info report_state; -- cgit v1.2.3 From cfaed10d1f27d036b72bbdc6b1e59ea28c38ec7f Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 1 Jun 2015 11:15:25 -0500 Subject: scatterlist: introduce sg_nents_for_len When performing a dma_map_sg() call, the number of sg entries to map is required. Using sg_nents to retrieve the number of sg entries will return the total number of entries in the sg list up to the entry marked as the end. If there happen to be unused entries in the list, these will still be counted. Some dma_map_sg() implementations will not handle the unused entries correctly (lib/swiotlb.c) and execute a BUG_ON. The sg_nents_for_len() function will traverse the sg list and return the number of entries required to satisfy the supplied length argument. This can then be supplied to the dma_map_sg() call to successfully map the sg. Signed-off-by: Tom Lendacky Signed-off-by: Herbert Xu --- include/linux/scatterlist.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index ed8f9e70df9b..a0edb992c9c3 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -221,6 +221,7 @@ static inline void *sg_virt(struct scatterlist *sg) } int sg_nents(struct scatterlist *sg); +int sg_nents_for_len(struct scatterlist *sg, u64 len); struct scatterlist *sg_next(struct scatterlist *); struct scatterlist *sg_last(struct scatterlist *s, unsigned int); void sg_init_table(struct scatterlist *, unsigned int); -- cgit v1.2.3 From d6472302f242559d45dcf4ebace62508dc4d8aeb Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 2 Jun 2015 19:01:38 +1000 Subject: x86/mm: Decouple from Nothing in uses anything from , so remove it from there and fix up the resulting build problems triggered on x86 {64|32}-bit {def|allmod|allno}configs. The breakages were triggering in places where x86 builds relied on vmalloc() facilities but did not include explicitly and relied on the implicit inclusion via . Also add: - to - to ... which were two other implicit header file dependencies. Suggested-by: David Miller Signed-off-by: Stephen Rothwell [ Tidied up the changelog. ] Acked-by: David Miller Acked-by: Takashi Iwai Acked-by: Viresh Kumar Acked-by: Vinod Koul Cc: Andrew Morton Cc: Anton Vorontsov Cc: Boris Ostrovsky Cc: Colin Cross Cc: David Vrabel Cc: H. Peter Anvin Cc: Haiyang Zhang Cc: James E.J. Bottomley Cc: Jaroslav Kysela Cc: K. Y. Srinivasan Cc: Kees Cook Cc: Konrad Rzeszutek Wilk Cc: Kristen Carlson Accardi Cc: Len Brown Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Suma Ramars Cc: Thomas Gleixner Cc: Tony Luck Signed-off-by: Ingo Molnar --- include/linux/io.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/io.h b/include/linux/io.h index 04cce4da3685..fb5a99800e77 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -19,6 +19,7 @@ #define _LINUX_IO_H #include +#include #include #include -- cgit v1.2.3 From da7049f834c3582c1ed1a04889bda5b4121973c0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 26 May 2015 13:49:07 -0400 Subject: svcrdma: Remove svc_rdma_xdr_decode_deferred_req() svc_rdma_xdr_decode_deferred_req() indexes an array with an un-byte-swapped value off the wire. Fortunately this function isn't used anywhere, so simply remove it. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index df8edf8ec914..8ad9b6d9d4e0 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -182,7 +182,6 @@ struct svcxprt_rdma { /* svc_rdma_marshal.c */ extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *); -extern int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *); extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, struct rpcrdma_msg *, enum rpcrdma_errcode, u32 *); -- cgit v1.2.3 From e842f2903908934187af7232fb5b21da527d1757 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 4 Jun 2015 09:18:18 +1000 Subject: dax: don't abuse get_block mapping for endio callbacks dax_fault() currently relies on the get_block callback to attach an io completion callback to the mapping buffer head so that it can run unwritten extent conversion after zeroing allocated blocks. Instead of this hack, pass the conversion callback directly into dax_fault() similar to the get_block callback. When the filesystem allocates unwritten extents, it will set the buffer_unwritten() flag, and hence the dax_fault code can call the completion function in the contexts where it is necessary without overloading the mapping buffer head. Note: The changes to ext4 to use this interface are suspect at best. In fact, the way ext4 did this end_io assignment in the first place looks suspect because it only set a completion callback when there wasn't already some other write() call taking place on the same inode. The ext4 end_io code looks rather intricate and fragile with all it's reference counting and passing to different contexts for modification via inode private pointers that aren't protected by locks... Signed-off-by: Dave Chinner Acked-by: Jan Kara Signed-off-by: Dave Chinner --- include/linux/fs.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 35ec87e490b1..c9b4cca9e08d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -70,6 +70,7 @@ typedef int (get_block_t)(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, ssize_t bytes, void *private); +typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate); #define MAY_EXEC 0x00000001 #define MAY_WRITE 0x00000002 @@ -2627,9 +2628,10 @@ ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t, int dax_clear_blocks(struct inode *, sector_t block, long size); int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); int dax_truncate_page(struct inode *, loff_t from, get_block_t); -int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); +int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, + dax_iodone_t); int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *); -#define dax_mkwrite(vma, vmf, gb) dax_fault(vma, vmf, gb) +#define dax_mkwrite(vma, vmf, gb, iod) dax_fault(vma, vmf, gb, iod) #ifdef CONFIG_BLOCK typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode, -- cgit v1.2.3 From ce5c5d554dc47a4fb4360c84b72231fea081e7a0 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 4 Jun 2015 09:18:18 +1000 Subject: dax: expose __dax_fault for filesystems with locking constraints Some filesystems cannot call dax_fault() directly because they have different locking and/or allocation constraints in the page fault IO path. To handle this, we need to follow the same model as the generic block_page_mkwrite code, where the internals are exposed via __block_page_mkwrite() so that filesystems can wrap the correct locking and operations around the outside. This is loosely based on a patch originally from Matthew Willcox. Unlike the original patch, it does not change ext4 code, error returns or unwritten extent conversion handling. It also adds a __dax_mkwrite() wrapper for .page_mkwrite implementations to do the right thing, too. Signed-off-by: Dave Chinner Reviewed-by: Jan Kara Signed-off-by: Dave Chinner --- include/linux/fs.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index c9b4cca9e08d..5784377e7c56 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2630,8 +2630,11 @@ int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); int dax_truncate_page(struct inode *, loff_t from, get_block_t); int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, dax_iodone_t); +int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, + dax_iodone_t); int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *); -#define dax_mkwrite(vma, vmf, gb, iod) dax_fault(vma, vmf, gb, iod) +#define dax_mkwrite(vma, vmf, gb, iod) dax_fault(vma, vmf, gb, iod) +#define __dax_mkwrite(vma, vmf, gb, iod) __dax_fault(vma, vmf, gb, iod) #ifdef CONFIG_BLOCK typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode, -- cgit v1.2.3 From c8fff7bc5bba6bd59cad40441c189c4efe7190f6 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 8 Apr 2015 19:59:20 +0300 Subject: of: return NUMA_NO_NODE from fallback of_node_to_nid() Node 0 might be offline as well as any other numa node, in this case kernel cannot handle memory allocation and crashes. Signed-off-by: Konstantin Khlebnikov Fixes: 0c3f061c195c ("of: implement of_node_to_nid as a weak function") Signed-off-by: Grant Likely --- include/linux/of.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index ddeaae6d2083..ab071742c0c4 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -667,7 +667,10 @@ static inline void of_property_clear_flag(struct property *p, unsigned long flag #if defined(CONFIG_OF) && defined(CONFIG_NUMA) extern int of_node_to_nid(struct device_node *np); #else -static inline int of_node_to_nid(struct device_node *device) { return 0; } +static inline int of_node_to_nid(struct device_node *device) +{ + return NUMA_NO_NODE; +} #endif static inline struct device_node *of_find_matching_node( -- cgit v1.2.3 From 12f7c14aa602f15ad60e5a9da459271f63b92917 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Thu, 4 Jun 2015 00:01:21 +0900 Subject: crypto: doc - Fix typo in crypto-API.xml This patch fix some typos found in crypto-API.xml. It is because the file is generated from comments in sources, so I had to fix typo in sources. Signed-off-by: Masanari Iida Acked-by: Stephan Mueller Signed-off-by: Herbert Xu --- include/linux/crypto.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 7d290a91c6f9..25a4b71d6d1f 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -451,7 +451,7 @@ struct compress_alg { * transformation algorithm. * @cra_type: Type of the cryptographic transformation. This is a pointer to * struct crypto_type, which implements callbacks common for all - * trasnformation types. There are multiple options: + * transformation types. There are multiple options: * &crypto_blkcipher_type, &crypto_ablkcipher_type, * &crypto_ahash_type, &crypto_aead_type, &crypto_rng_type. * This field might be empty. In that case, there are no common -- cgit v1.2.3 From 64d6067057d9658acb8675afcfba549abdb7fc16 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 7 May 2015 11:36:11 +0200 Subject: KVM: x86: stubs for SMM support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds the interface between x86.c and the emulator: the SMBASE register, a new emulator flag, the RSM instruction. It also adds a new request bit that will be used by the KVM_SMI ioctl. Reviewed-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a8bcbc9c6078..b019fee6d941 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -134,6 +134,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_ENABLE_IBS 23 #define KVM_REQ_DISABLE_IBS 24 #define KVM_REQ_APIC_PAGE_RELOAD 25 +#define KVM_REQ_SMI 26 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 -- cgit v1.2.3 From 9e7c8f8c62c1e1cda203b5bfaba4575b141e42e7 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 4 Jun 2015 16:22:16 -0400 Subject: signals: don't abuse __flush_signals() in selinux_bprm_committed_creds() selinux_bprm_committed_creds()->__flush_signals() is not right, we shouldn't clear TIF_SIGPENDING unconditionally. There can be other reasons for signal_pending(): freezing(), JOBCTL_PENDING_MASK, and potentially more. Also change this code to check fatal_signal_pending() rather than SIGNAL_GROUP_EXIT, it looks a bit better. Now we can kill __flush_signals() before it finds another buggy user. Note: this code looks racy, we can flush a signal which was sent after the task SID has been updated. Signed-off-by: Oleg Nesterov Signed-off-by: Paul Moore --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8222ae40ecb0..4f84aade8b4d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2373,7 +2373,6 @@ extern void sched_dead(struct task_struct *p); extern void proc_caches_init(void); extern void flush_signals(struct task_struct *); -extern void __flush_signals(struct task_struct *); extern void ignore_signals(struct task_struct *); extern void flush_signal_handlers(struct task_struct *, int force_default); extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info); -- cgit v1.2.3 From 30b7e246a6222f1fbad39b1451273375306fe1e2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Jun 2015 11:21:10 -0400 Subject: svcrdma: Keep rpcrdma_msg fields in network byte-order Fields in struct rpcrdma_msg are __be32. Don't byte-swap these fields when decoding RPC calls and then swap them back for the reply. For the most part, they can be left alone. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 8ad9b6d9d4e0..c03ca0a1b743 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -184,7 +184,7 @@ struct svcxprt_rdma { extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *); extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, struct rpcrdma_msg *, - enum rpcrdma_errcode, u32 *); + enum rpcrdma_errcode, __be32 *); extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int); extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int); extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int, -- cgit v1.2.3 From b7e0b9a965a116341b4ef86ab98ea2843b218271 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Jun 2015 11:21:20 -0400 Subject: svcrdma: Replace GFP_KERNEL in a loop with GFP_NOFAIL At the 2015 LSF/MM, it was requested that memory allocation call sites that request GFP_KERNEL allocations in a loop should be annotated with __GFP_NOFAIL. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index c03ca0a1b743..d26384b22126 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -211,7 +211,6 @@ extern int svc_rdma_sendto(struct svc_rqst *); extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *); extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *, enum rpcrdma_errcode); -struct page *svc_rdma_get_page(void); extern int svc_rdma_post_recv(struct svcxprt_rdma *); extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); -- cgit v1.2.3 From 0380a3f37540ad0582b3c749a74fc127af914689 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 4 Jun 2015 11:21:32 -0400 Subject: svcrdma: Add a separate "max data segs macro for svcrdma The server and client maximum are architecturally independent. Allow changing one without affecting the other. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index d26384b22126..cb94ee4181d4 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -172,6 +172,13 @@ struct svcxprt_rdma { #define RDMAXPRT_SQ_PENDING 2 #define RDMAXPRT_CONN_PENDING 3 +#define RPCRDMA_MAX_SVC_SEGS (64) /* server max scatter/gather */ +#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT) +#define RPCRDMA_MAXPAYLOAD RPCSVC_MAXPAYLOAD +#else +#define RPCRDMA_MAXPAYLOAD (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT) +#endif + #define RPCRDMA_LISTEN_BACKLOG 10 /* The default ORD value is based on two outstanding full-size writes with a * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */ -- cgit v1.2.3 From 42aecaa9bb2bd57eb8d61b4565cee5d3640863fb Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 4 Jun 2015 09:16:39 -0700 Subject: net: Get skb hash over flow_keys structure This patch changes flow hashing to use jhash2 over the flow_keys structure instead just doing jhash_3words over src, dst, and ports. This method will allow us take more input into the hashing function so that we can include full IPv6 addresses, VLAN, flow labels etc. without needing to resort to xor'ing which makes for a poor hash. Acked-by: Jiri Pirko Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6b41c15efa27..cc612fc0a894 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1943,7 +1943,7 @@ static inline void skb_probe_transport_header(struct sk_buff *skb, if (skb_transport_header_was_set(skb)) return; else if (skb_flow_dissect_flow_keys(skb, &keys)) - skb_set_transport_header(skb, keys.basic.thoff); + skb_set_transport_header(skb, keys.control.thoff); else skb_set_transport_header(skb, offset_hint); } -- cgit v1.2.3 From 01949d0109ee5fae33752f0db99a36f1619e1873 Mon Sep 17 00:00:00 2001 From: Haggai Abramonvsky Date: Thu, 4 Jun 2015 19:30:38 +0300 Subject: net/mlx5_core: Enable XRCs and SRQs when using ISSI > 0 When working in ISSI > 0 mode, the model exposed by the device for XRCs and SRQs is different. XRCs use XRC SRQs and plain SRQs are based on RPM (Receive Memory Pool). Add helper functions to create, modify, query, and arm XRC SRQs and RMPs. Signed-off-by: Haggai Abramovsky Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 6 +++++- include/linux/mlx5/mlx5_ifc.h | 16 ++++++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7fa26f03acc1..ba9f212c94bb 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -339,6 +339,8 @@ struct mlx5_core_mr { enum mlx5_res_type { MLX5_RES_QP, + MLX5_RES_SRQ, + MLX5_RES_XSRQ, }; struct mlx5_core_rsc_common { @@ -348,6 +350,7 @@ struct mlx5_core_rsc_common { }; struct mlx5_core_srq { + struct mlx5_core_rsc_common common; /* must be first */ u32 srqn; int max; int max_gs; @@ -640,7 +643,8 @@ struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev, struct mlx5_cmd_mailbox *head); int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_create_srq_mbox_in *in, int inlen); + struct mlx5_create_srq_mbox_in *in, int inlen, + int is_xrc); int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq); int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, struct mlx5_query_srq_mbox_out *out); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index b27e9f6e090a..dbe2b32c0539 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2022,12 +2022,9 @@ struct mlx5_ifc_srqc_bits { u8 reserved_9[0x40]; - u8 db_record_addr_h[0x20]; - - u8 db_record_addr_l[0x1e]; - u8 reserved_10[0x2]; + u8 dbr_addr[0x40]; - u8 reserved_11[0x80]; + u8 reserved_10[0x80]; }; enum { @@ -4167,6 +4164,13 @@ struct mlx5_ifc_modify_rmp_out_bits { u8 reserved_1[0x40]; }; +struct mlx5_ifc_rmp_bitmask_bits { + u8 reserved[0x20]; + + u8 reserved1[0x1f]; + u8 lwm[0x1]; +}; + struct mlx5_ifc_modify_rmp_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; @@ -4180,7 +4184,7 @@ struct mlx5_ifc_modify_rmp_in_bits { u8 reserved_3[0x20]; - u8 modify_bitmask[0x40]; + struct mlx5_ifc_rmp_bitmask_bits bitmask; u8 reserved_4[0x40]; -- cgit v1.2.3 From d18a9470f89727f870db944a36223bf1bb15bdc1 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 4 Jun 2015 19:30:40 +0300 Subject: net/mlx5_core: Make the vport helpers available for the IB driver too Move the vport header file to be under include/linux/mlx5, such that the mlx5 IB can use it as well. Also add nic_ prefix to the vport NIC commands to differeniate between HCA vport commands and NIC vport commands. Signed-off-by: Majd Dibbiny Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx5/vport.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 include/linux/mlx5/vport.h (limited to 'include/linux') diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h new file mode 100644 index 000000000000..99d0e9f85432 --- /dev/null +++ b/include/linux/mlx5/vport.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_VPORT_H__ +#define __MLX5_VPORT_H__ + +#include + +u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod); +void mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr); + +#endif /* __MLX5_VPORT_H__ */ -- cgit v1.2.3 From 707c4602cda6624940761b66a4119f1909492385 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 4 Jun 2015 19:30:41 +0300 Subject: net/mlx5_core: Add new query HCA vport commands Added the implementation for the following commands: 1. QUERY_HCA_VPORT_GID 2. QUERY_HCA_VPORT_PKEY 3. QUERY_HCA_VPORT_CONTEXT They will be needed when we move to work with ISSI > 0 in the IB driver too. Signed-off-by: Majd Dibbiny Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 13 +++++++++++++ include/linux/mlx5/driver.h | 45 +++++++++++++++++++++++++++++++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 23 ++++++++++++++-------- include/linux/mlx5/vport.h | 14 ++++++++++++++ 4 files changed, 87 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index b288c538347a..b2c43508a737 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -99,6 +99,12 @@ __mlx5_mask(typ, fld)) #define MLX5_GET64(typ, p, fld) be64_to_cpu(*((__be64 *)(p) + __mlx5_64_off(typ, fld))) +#define MLX5_GET64_PR(typ, p, fld) ({ \ + u64 ___t = MLX5_GET64(typ, p, fld); \ + pr_debug(#fld " = 0x%llx\n", ___t); \ + ___t; \ +}) + enum { MLX5_MAX_COMMANDS = 32, MLX5_CMD_DATA_BLOCK_SIZE = 512, @@ -1172,4 +1178,11 @@ enum { MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR = 0x40, }; +static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz) +{ + if (pkey_sz > MLX5_MAX_LOG_PKEY_TABLE) + return 0; + return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz; +} + #endif /* MLX5_DEVICE_H */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ba9f212c94bb..8ab8b8af5c32 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -553,6 +553,41 @@ struct mlx5_pas { u8 log_sz; }; +enum port_state_policy { + MLX5_AAA_000 +}; + +enum phy_port_state { + MLX5_AAA_111 +}; + +struct mlx5_hca_vport_context { + u32 field_select; + bool sm_virt_aware; + bool has_smi; + bool has_raw; + enum port_state_policy policy; + enum phy_port_state phys_state; + enum ib_port_state vport_state; + u8 port_physical_state; + u64 sys_image_guid; + u64 port_guid; + u64 node_guid; + u32 cap_mask1; + u32 cap_mask1_perm; + u32 cap_mask2; + u32 cap_mask2_perm; + u16 lid; + u8 init_type_reply; /* bitmask: see ib spec 14.2.5.6 InitTypeReply */ + u8 lmc; + u8 subnet_timeout; + u16 sm_lid; + u8 sm_sl; + u16 qkey_violation_counter; + u16 pkey_violation_counter; + bool grh_required; +}; + static inline void *mlx5_buf_offset(struct mlx5_buf *buf, int offset) { return buf->direct.buf + offset; @@ -792,4 +827,14 @@ struct mlx5_profile { } mr_cache[MAX_MR_CACHE_ENTRIES]; }; +static inline int mlx5_get_gid_table_len(u16 param) +{ + if (param > 4) { + pr_warn("gid table length is zero\n"); + return 0; + } + + return 8 * (1 << param); +} + #endif /* MLX5_DRIVER_H */ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index dbe2b32c0539..f06d054ad021 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2221,12 +2221,15 @@ struct mlx5_ifc_hca_vport_context_bits { u8 has_smi[0x1]; u8 has_raw[0x1]; u8 grh_required[0x1]; - u8 reserved_1[0x10]; - u8 port_state_policy[0x4]; - u8 phy_port_state[0x4]; + u8 reserved_1[0xc]; + u8 port_physical_state[0x4]; + u8 vport_state_policy[0x4]; + u8 port_state[0x4]; u8 vport_state[0x4]; - u8 reserved_2[0x60]; + u8 reserved_2[0x20]; + + u8 system_image_guid[0x40]; u8 port_guid[0x40]; @@ -3490,7 +3493,8 @@ struct mlx5_ifc_query_hca_vport_pkey_in_bits { u8 op_mod[0x10]; u8 other_vport[0x1]; - u8 reserved_2[0xf]; + u8 reserved_2[0xb]; + u8 port_num[0x4]; u8 vport_number[0x10]; u8 reserved_3[0x10]; @@ -3519,7 +3523,8 @@ struct mlx5_ifc_query_hca_vport_gid_in_bits { u8 op_mod[0x10]; u8 other_vport[0x1]; - u8 reserved_2[0xf]; + u8 reserved_2[0xb]; + u8 port_num[0x4]; u8 vport_number[0x10]; u8 reserved_3[0x10]; @@ -3545,7 +3550,8 @@ struct mlx5_ifc_query_hca_vport_context_in_bits { u8 op_mod[0x10]; u8 other_vport[0x1]; - u8 reserved_2[0xf]; + u8 reserved_2[0xb]; + u8 port_num[0x4]; u8 vport_number[0x10]; u8 reserved_3[0x20]; @@ -4243,7 +4249,8 @@ struct mlx5_ifc_modify_hca_vport_context_in_bits { u8 op_mod[0x10]; u8 other_vport[0x1]; - u8 reserved_2[0xf]; + u8 reserved_2[0xb]; + u8 port_num[0x4]; u8 vport_number[0x10]; u8 reserved_3[0x20]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 99d0e9f85432..67882a834efb 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -37,5 +37,19 @@ u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod); void mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr); +int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, + u8 port_num, u16 vf_num, u16 gid_index, + union ib_gid *gid); +int mlx5_query_hca_vport_pkey(struct mlx5_core_dev *dev, u8 other_vport, + u8 port_num, u16 vf_num, u16 pkey_index, + u16 *pkey); +int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev, + u8 other_vport, u8 port_num, + u16 vf_num, + struct mlx5_hca_vport_context *rep); +int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev, + __be64 *sys_image_guid); +int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, + u64 *node_guid); #endif /* __MLX5_VPORT_H__ */ -- cgit v1.2.3 From 211e6c80e5a68ef39a81484583e8efbf9774627d Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 4 Jun 2015 19:30:42 +0300 Subject: net/mlx5_core: Get vendor-id using the query adapter command Add two wrapper functions to the query adapter command: 1. mlx5_query_board_id -- replaces the old mlx5_cmd_query_adapter. 2. mlx5_core_query_vendor_id -- retrieves the vendor_id from the query_adapter command. Signed-off-by: Majd Dibbiny Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 1 + include/linux/mlx5/mlx5_ifc.h | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 8ab8b8af5c32..b90fb9336d21 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -817,6 +817,7 @@ struct mlx5_interface { void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol); int mlx5_register_interface(struct mlx5_interface *intf); void mlx5_unregister_interface(struct mlx5_interface *intf); +int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id); struct mlx5_profile { u64 mask; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f06d054ad021..6d2f6fee041c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2470,9 +2470,12 @@ union mlx5_ifc_cong_control_roce_ecn_auto_bits { }; struct mlx5_ifc_query_adapter_param_block_bits { - u8 reserved_0[0xe0]; + u8 reserved_0[0xc0]; - u8 reserved_1[0x10]; + u8 reserved_1[0x8]; + u8 ieee_vendor_id[0x18]; + + u8 reserved_2[0x10]; u8 vsd_vendor_id[0x10]; u8 vsd[208][0x8]; -- cgit v1.2.3 From e760152d08da78aa160e68ac90bf8f3f10aff462 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 4 Jun 2015 19:30:43 +0300 Subject: net/mlx5_core: Use port number in the query port mtu helpers Extend the function prototypes for max and operational mtu to take the local port number. In the Ethernet driver is this hard coded to one, since ConnectX4 Ethernet devices are always function-per-port. The IB driver also serves older devices (ConnectIB) which isn't such, and hence the part can vary. Signed-off-by: Majd Dibbiny Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index b90fb9336d21..cd09784b6999 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -751,8 +751,10 @@ int mlx5_set_port_status(struct mlx5_core_dev *dev, int mlx5_query_port_status(struct mlx5_core_dev *dev, u8 *status); int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu); -int mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu); -int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu); +int mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, + u8 local_port); +int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, + u8 local_port); int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -- cgit v1.2.3 From a05bdefa4081d43f9c86c3bb693d0492a21590da Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 4 Jun 2015 19:30:44 +0300 Subject: net/mlx5_core: Use port number when querying port ptys Until now, mlx5_query_port_ptys always queried port number one. Added new argument in the function's prototype so we can also query the second port. This will be needed when thr helper will be invoked from the IB driver on non FPP (Function-Per-Port) devices. Signed-off-by: Majd Dibbiny Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index cd09784b6999..e4b814f64014 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -739,7 +739,7 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, - int ptys_size, int proto_mask); + int ptys_size, int proto_mask, u8 local_port); int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev, u32 *proto_cap, int proto_mask); int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, -- cgit v1.2.3 From a124d13ef59e09941fc0924fd7c29ae6d7cd77a3 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 4 Jun 2015 19:30:45 +0300 Subject: net/mlx5_core: Add more query port helpers Add the following helpers: 1. mlx5_query_port_proto_oper -- queries the port speed port mask 2. mlx5_query_port_link_width_oper - queries the port link with bitmask 3. mlx5_query_port_vl_hw_cap - queries the Virtual Lanes supported on this port These helpers will be used from the IB driver when working in ISSI > 0 mode. Signed-off-by: Majd Dibbiny Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index e4b814f64014..6093bde16b94 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -107,6 +107,7 @@ enum { MLX5_REG_PUDE = 0x5009, MLX5_REG_PMPE = 0x5010, MLX5_REG_PELC = 0x500e, + MLX5_REG_PVLC = 0x500f, MLX5_REG_PMLP = 0, /* TBD */ MLX5_REG_NODE_DESC = 0x6001, MLX5_REG_HOST_ENDIANNESS = 0x7004, @@ -744,6 +745,11 @@ int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev, u32 *proto_cap, int proto_mask); int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, u32 *proto_admin, int proto_mask); +int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, + u8 *link_width_oper, u8 local_port); +int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev, + u8 *proto_oper, int proto_mask, + u8 local_port); int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin, int proto_mask); int mlx5_set_port_status(struct mlx5_core_dev *dev, @@ -755,6 +761,8 @@ int mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, u8 local_port); int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, u8 local_port); +int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev, + u8 *vl_hw_cap, u8 local_port); int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -- cgit v1.2.3 From 03fbf488cece461468d3abb795f5e5f055e00040 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 4 Jun 2015 16:55:10 +0300 Subject: spi: pxa2xx: Differentiate Intel LPSS types Intel LPSS SPI properties differ between between platforms. Now private registers offset 0x400 or 0x800 is autodetected but there is need to support also other offset and handle a few other differences. Prepare for that by splitting the LPSS_SSP type into compatible hardware types and set it now based on PCI or ACPI ID. That type will be used to set properties that differ between current and upcoming platforms. Signed-off-by: Jarkko Nikula Signed-off-by: Mark Brown --- include/linux/pxa2xx_ssp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h index dab545bb66b3..95a4b3bd7a5c 100644 --- a/include/linux/pxa2xx_ssp.h +++ b/include/linux/pxa2xx_ssp.h @@ -194,8 +194,9 @@ enum pxa_ssp_type { PXA168_SSP, PXA910_SSP, CE4100_SSP, - LPSS_SSP, QUARK_X1000_SSP, + LPSS_LPT_SSP, + LPSS_BYT_SSP, }; struct ssp_device { -- cgit v1.2.3 From dccf7369652f3934456345aab6a92fa905177886 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 4 Jun 2015 16:55:11 +0300 Subject: spi: pxa2xx: Prepare for new Intel LPSS SPI type Some of the Intel LPSS SPI properties will be different in upcoming platforms compared to existing Lynxpoint and BayTrail/Braswell. LPSS SPI private registers will be at different offset and there will be changes in individual registers and default FIFO thresholds too. Add configuration for these differences and use them in runtime based on LPSS SSP type. With this change private registers offset autodetection becomes needless. Signed-off-by: Jarkko Nikula Signed-off-by: Mark Brown --- include/linux/pxa2xx_ssp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h index 95a4b3bd7a5c..0485bab061fd 100644 --- a/include/linux/pxa2xx_ssp.h +++ b/include/linux/pxa2xx_ssp.h @@ -195,7 +195,7 @@ enum pxa_ssp_type { PXA910_SSP, CE4100_SSP, QUARK_X1000_SSP, - LPSS_LPT_SSP, + LPSS_LPT_SSP, /* Keep LPSS types sorted with lpss_platforms[] */ LPSS_BYT_SSP, }; -- cgit v1.2.3 From 0f41979164a52a1ca0f0a601f90000fc18e3a396 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Jun 2015 22:59:08 -0400 Subject: SUNRPC: Remove unused argument 'tk_ops' in rpc_run_bc_task Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 5f1e6bd4c316..2aa68976a482 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -205,8 +205,7 @@ struct rpc_wait_queue { */ struct rpc_task *rpc_new_task(const struct rpc_task_setup *); struct rpc_task *rpc_run_task(const struct rpc_task_setup *); -struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, - const struct rpc_call_ops *ops); +struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req); void rpc_put_task(struct rpc_task *); void rpc_put_task_async(struct rpc_task *); void rpc_exit_task(struct rpc_task *); -- cgit v1.2.3 From 0d2a970d0ae55086520e1e58e572a7acd519429c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 4 Jun 2015 15:37:10 -0400 Subject: SUNRPC: Fix a backchannel race We need to allow the server to send a new request immediately after we've replied to the previous one. Right now, there is a window between the send and the release of the old request in rpc_put_task(), where the server could send us a new backchannel RPC call, and we have no request to service it. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 8b93ef53df3c..bc9c39d6d30d 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -212,7 +212,8 @@ struct rpc_xprt { #if defined(CONFIG_SUNRPC_BACKCHANNEL) struct svc_serv *bc_serv; /* The RPC service which will */ /* process the callback */ - unsigned int bc_alloc_count; /* Total number of preallocs */ + int bc_alloc_count; /* Total number of preallocs */ + atomic_t bc_free_slots; spinlock_t bc_pa_lock; /* Protects the preallocated * items */ struct list_head bc_pa_list; /* List of preallocated -- cgit v1.2.3 From 8e73485c7959fd25650761eab04db1e72ea14c23 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 17 May 2015 13:58:53 +0200 Subject: KVM: add vcpu-specific functions to read/write/translate GFNs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to hide SMRAM from guests not running in SMM. Therefore, all uses of kvm_read_guest* and kvm_write_guest* must be changed to use different address spaces, depending on whether the VCPU is in system management mode. We need to introduce a new family of functions for this purpose. For now, the VCPU-based functions have the same behavior as the existing per-VM ones, they just accept a different type for the first argument. Later however they will be changed to use one of many "struct kvm_memslots" stored in struct kvm, through an architecture hook. VM-based functions will unconditionally use the first memslots pointer. Whenever possible, this patch introduces slot-based functions with an __ prefix, with two wrappers for generic and vcpu-based actions. The exceptions are kvm_read_guest and kvm_write_guest, which are copied into the new functions kvm_vcpu_read_guest and kvm_vcpu_write_guest. Reviewed-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b019fee6d941..ba1ea43998e4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -471,6 +471,11 @@ static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) || lockdep_is_held(&kvm->slots_lock)); } +static inline struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu) +{ + return kvm_memslots(vcpu->kvm); +} + static inline struct kvm_memory_slot * id_to_memslot(struct kvm_memslots *slots, int id) { @@ -576,6 +581,25 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); +struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu); +struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn); +pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn); +pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); +struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn); +unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn); +unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable); +int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset, + int len); +int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa, void *data, + unsigned long len); +int kvm_vcpu_read_guest(struct kvm_vcpu *vcpu, gpa_t gpa, void *data, + unsigned long len); +int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, const void *data, + int offset, int len); +int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, + unsigned long len); +void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); + void kvm_vcpu_block(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); int kvm_vcpu_yield_to(struct kvm_vcpu *target); -- cgit v1.2.3 From f481b069e674378758c73761827e83ab05c46b52 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 17 May 2015 17:30:37 +0200 Subject: KVM: implement multiple address spaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only two ioctls have to be modified; the address space id is placed in the higher 16 bits of their slot id argument. As of this patch, no architecture defines more than one address space; x86 will be the first. Reviewed-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ba1ea43998e4..9564fd78c547 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -44,6 +44,10 @@ /* Two fragments for cross MMIO pages. */ #define KVM_MAX_MMIO_FRAGMENTS 2 +#ifndef KVM_ADDRESS_SPACE_NUM +#define KVM_ADDRESS_SPACE_NUM 1 +#endif + /* * For the normal pfn, the highest 12 bits should be zero, * so we can mask bit 62 ~ bit 52 to indicate the error pfn, @@ -331,6 +335,13 @@ struct kvm_kernel_irq_routing_entry { #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS) #endif +#ifndef __KVM_VCPU_MULTIPLE_ADDRESS_SPACE +static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu) +{ + return 0; +} +#endif + /* * Note: * memslots are not sorted by id anymore, please use id_to_memslot() @@ -349,7 +360,7 @@ struct kvm { spinlock_t mmu_lock; struct mutex slots_lock; struct mm_struct *mm; /* userspace tied to this vm */ - struct kvm_memslots *memslots; + struct kvm_memslots *memslots[KVM_ADDRESS_SPACE_NUM]; struct srcu_struct srcu; struct srcu_struct irq_srcu; #ifdef CONFIG_KVM_APIC_ARCHITECTURE @@ -464,16 +475,23 @@ void kvm_exit(void); void kvm_get_kvm(struct kvm *kvm); void kvm_put_kvm(struct kvm *kvm); -static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) +static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id) { - return rcu_dereference_check(kvm->memslots, + return rcu_dereference_check(kvm->memslots[as_id], srcu_read_lock_held(&kvm->srcu) || lockdep_is_held(&kvm->slots_lock)); } +static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) +{ + return __kvm_memslots(kvm, 0); +} + static inline struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu) { - return kvm_memslots(vcpu->kvm); + int as_id = kvm_arch_vcpu_memslots_id(vcpu); + + return __kvm_memslots(vcpu->kvm, as_id); } static inline struct kvm_memory_slot * -- cgit v1.2.3 From 3f21c265cd5f7ae867cc0e86a1f6d5093f1963cc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 5 Jun 2015 10:57:37 -0600 Subject: block: add blk_set_queue_dying() to blkdev.h We export this function and NVMe wants to use it, but for some reason it was never added to the block header. Do that. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ccaa9aecd593..a31380c35918 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1024,6 +1024,7 @@ bool __must_check blk_get_queue(struct request_queue *); struct request_queue *blk_alloc_queue(gfp_t); struct request_queue *blk_alloc_queue_node(gfp_t, int); extern void blk_put_queue(struct request_queue *); +extern void blk_set_queue_dying(struct request_queue *); /* * block layer runtime pm functions -- cgit v1.2.3 From a5768aa887fb636f0cc4c83a2f1242506aaf50f6 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 1 Jun 2015 14:28:14 -0600 Subject: NVMe: Automatic namespace rescan Namespaces may be dynamically allocated and deleted or attached and detached. This has the driver rescan the device for namespace changes after each device reset or namespace change asynchronous event. There could potentially be many detached namespaces that we don't want polluting /dev/ with unusable block handles, so this will delete disks if the namespace is not active as indicated by the response from identify namespace. This also skips adding the disk if no capacity is provisioned to the namespace in the first place. Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/nvme.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 986bf8ad8e93..c0d94ed8ce9a 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -92,6 +92,7 @@ struct nvme_dev { work_func_t reset_workfn; struct work_struct reset_work; struct work_struct probe_work; + struct work_struct scan_work; char name[12]; char serial[20]; char model[40]; -- cgit v1.2.3 From 2e61dfb3602b904966491f260f62c01b9895936a Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Fri, 5 Jun 2015 11:26:13 -0500 Subject: clk: of: helper for filling parent clock array and return num of parents Sprinkled all through the platform clock drivers are code like this to fill the clock parent array: for (i = 0; i < num_parents; ++i) parent_names[i] = of_clk_get_parent_name(np, i); The of_clk_parent_fill() will do the same as the code above, and while at it, return the number of parents as well since the logic of the function is to the walk the clock node to look for the parent. Signed-off-by: Dinh Nguyen [sboyd@codeaurora.org: Fixed kernel-doc] Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 5378c2aba4d2..2e5df069ca34 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -626,6 +626,8 @@ struct clk *of_clk_src_simple_get(struct of_phandle_args *clkspec, void *data); struct clk *of_clk_src_onecell_get(struct of_phandle_args *clkspec, void *data); int of_clk_get_parent_count(struct device_node *np); +int of_clk_parent_fill(struct device_node *np, const char **parents, + unsigned int size); const char *of_clk_get_parent_name(struct device_node *np, int index); void of_clk_init(const struct of_device_id *matches); -- cgit v1.2.3 From d691f9e8d4405c334aa10d556e73c8bf44cb0e01 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 4 Jun 2015 10:11:54 -0700 Subject: bpf: allow programs to write to certain skb fields allow programs read/write skb->mark, tc_index fields and ((struct qdisc_skb_cb *)cb)->data. mark and tc_index are generically useful in TC. cb[0]-cb[4] are primarily used to pass arguments from one program to another called via bpf_tail_call() which can be seen in sockex3_kern.c example. All fields of 'struct __sk_buff' are readable to socket and tc_cls_act progs. mark, tc_index are writeable from tc_cls_act only. cb[0]-cb[4] are writeable by both sockets and tc_cls_act. Add verifier tests and improve sample code. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ca854e5bb2f7..2235aee8096a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -105,7 +105,8 @@ struct bpf_verifier_ops { */ bool (*is_valid_access)(int off, int size, enum bpf_access_type type); - u32 (*convert_ctx_access)(int dst_reg, int src_reg, int ctx_off, + u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg, + int src_reg, int ctx_off, struct bpf_insn *insn); }; -- cgit v1.2.3 From 4eaca0a887eaee04fc7a3866d0f5b51b34030dfa Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 4 Jun 2015 17:39:08 +0200 Subject: preempt: Use preempt_schedule_context() as the official tracing preemption point preempt_schedule_context() is a tracing safe preemption point but it's only used when CONFIG_CONTEXT_TRACKING=y. Other configs have tracing recursion issues since commit: b30f0e3ffedf ("sched/preempt: Optimize preemption operations on __schedule() callers") introduced function based preemp_count_*() ops. Lets make it available on all configs and give it a more appropriate name for its new position. Reported-by: Fengguang Wu Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1433432349-1021-3-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/preempt.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index a1a00e14c14f..7686dd63bc35 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -204,15 +204,11 @@ do { \ #ifdef CONFIG_PREEMPT -#ifndef CONFIG_CONTEXT_TRACKING -#define __preempt_schedule_context() __preempt_schedule() -#endif - #define preempt_enable_notrace() \ do { \ barrier(); \ if (unlikely(__preempt_count_dec_and_test())) \ - __preempt_schedule_context(); \ + __preempt_schedule_notrace(); \ } while (0) #else #define preempt_enable_notrace() \ -- cgit v1.2.3 From 9a92e3dc6ad02208a014d0d8404ebbd697e3d5ef Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 4 Jun 2015 17:39:09 +0200 Subject: preempt: Reorganize the notrace definitions a bit preempt.h has two seperate "#ifdef CONFIG_PREEMPT" sections: one to define preempt_enable() and another to define preempt_enable_notrace(). Lets gather both. Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Fengguang Wu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1433432349-1021-4-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/preempt.h | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 7686dd63bc35..0f1534acaf60 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -175,48 +175,46 @@ do { \ __preempt_schedule(); \ } while (0) +#define preempt_enable_notrace() \ +do { \ + barrier(); \ + if (unlikely(__preempt_count_dec_and_test())) \ + __preempt_schedule_notrace(); \ +} while (0) + #define preempt_check_resched() \ do { \ if (should_resched()) \ __preempt_schedule(); \ } while (0) -#else +#else /* !CONFIG_PREEMPT */ #define preempt_enable() \ do { \ barrier(); \ preempt_count_dec(); \ } while (0) -#define preempt_check_resched() do { } while (0) -#endif - -#define preempt_disable_notrace() \ -do { \ - __preempt_count_inc(); \ - barrier(); \ -} while (0) -#define preempt_enable_no_resched_notrace() \ +#define preempt_enable_notrace() \ do { \ barrier(); \ __preempt_count_dec(); \ } while (0) -#ifdef CONFIG_PREEMPT +#define preempt_check_resched() do { } while (0) +#endif /* CONFIG_PREEMPT */ -#define preempt_enable_notrace() \ +#define preempt_disable_notrace() \ do { \ + __preempt_count_inc(); \ barrier(); \ - if (unlikely(__preempt_count_dec_and_test())) \ - __preempt_schedule_notrace(); \ } while (0) -#else -#define preempt_enable_notrace() \ + +#define preempt_enable_no_resched_notrace() \ do { \ barrier(); \ __preempt_count_dec(); \ } while (0) -#endif #else /* !CONFIG_PREEMPT_COUNT */ -- cgit v1.2.3 From 21509084f999d7accd32e45961ef76853112e978 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 6 May 2015 15:33:49 -0400 Subject: perf/x86/intel: Handle multiple records in the PEBS buffer When the PEBS interrupt threshold is larger than one record and the machine supports multiple PEBS events, the records of these events are mixed up and we need to demultiplex them. Demuxing the records is hard because the hardware is deficient. The hardware has two issues that, when combined, create impossible scenarios to demux. The first issue is that the 'status' field of the PEBS record is a copy of the GLOBAL_STATUS MSR at PEBS assist time. To see why this is a problem let us first describe the regular PEBS cycle: A) the CTRn value reaches 0: - the corresponding bit in GLOBAL_STATUS gets set - we start arming the hardware assist < some unspecified amount of time later -- this could cover multiple events of interest > B) the hardware assist is armed, any next event will trigger it C) a matching event happens: - the hardware assist triggers and generates a PEBS record this includes a copy of GLOBAL_STATUS at this moment - if we auto-reload we (re)set CTRn - we clear the relevant bit in GLOBAL_STATUS Now consider the following chain of events: A0, B0, A1, C0 The event generated for counter 0 will include a status with counter 1 set, even though its not at all related to the record. A similar thing can happen with a !PEBS event if it just happens to overflow at the right moment. The second issue is that the hardware will only emit one record for two or more counters if the event that triggers the assist is 'close'. The 'close' can be several cycles. In some cases even the complete assist, if the event is something that doesn't need retirement. For instance, consider this chain of events: A0, B0, A1, B1, C01 Where C01 is an event that triggers both hardware assists, we will generate but a single record, but again with both counters listed in the status field. This time the record pertains to both events. Note that these two cases are different but undistinguishable with the data as generated. Therefore demuxing records with multiple PEBS bits (we can safely ignore status bits for !PEBS counters) is impossible. Furthermore we cannot emit the record to both events because that might cause a data leak -- the events might not have the same privileges -- so what this patch does is discard such events. The assumption/hope is that such discards will be rare. Here lists some possible ways you may get high discard rate. - when you count the same thing multiple times. But it is not a useful configuration. - you can be unfortunate if you measure with a userspace only PEBS event along with either a kernel or unrestricted PEBS event. Imagine the event triggering and setting the overflow flag right before entering the kernel. Then all kernel side events will end up with multiple bits set. Signed-off-by: Yan, Zheng Signed-off-by: Kan Liang [ Changelog improvements. ] Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@infradead.org Cc: eranian@google.com Link: http://lkml.kernel.org/r/1430940834-8964-4-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 06580028cee6..5f192e1bc98e 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -730,6 +730,19 @@ extern int perf_event_overflow(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs); +extern void perf_event_output(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs); + +extern void +perf_event_header__init_id(struct perf_event_header *header, + struct perf_sample_data *data, + struct perf_event *event); +extern void +perf_event__output_id_sample(struct perf_event *event, + struct perf_output_handle *handle, + struct perf_sample_data *sample); + static inline bool is_sampling_event(struct perf_event *event) { return event->attr.sample_period != 0; -- cgit v1.2.3 From f38b0dbb491a6987e198aa6b428db8692a6480f8 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Sun, 10 May 2015 15:13:14 -0400 Subject: perf/x86/intel: Introduce PERF_RECORD_LOST_SAMPLES After enlarging the PEBS interrupt threshold, there may be some mixed up PEBS samples which are discarded by the kernel. This patch makes the kernel emit a PERF_RECORD_LOST_SAMPLES record with the number of possible discarded records when it is impossible to demux the samples. It makes sure the user is not left in the dark about such discards. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@infradead.org Cc: eranian@google.com Link: http://lkml.kernel.org/r/1431285195-14269-8-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 5f192e1bc98e..a204d5266f5f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -743,6 +743,9 @@ perf_event__output_id_sample(struct perf_event *event, struct perf_output_handle *handle, struct perf_sample_data *sample); +extern void +perf_log_lost_samples(struct perf_event *event, u64 lost); + static inline bool is_sampling_event(struct perf_event *event) { return event->attr.sample_period != 0; -- cgit v1.2.3 From 7cf7fa529d0b6b514949cc67b39e3ce406c37006 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Sun, 7 Jun 2015 15:44:23 +0300 Subject: net/mlx5_core: Fix static checker warnings around system guid query flow Fix static checker warnings in the flow of system guid query. Fixes: 707c4602cda6 ('net/mlx5_core: Add new query HCA vport commands') Signed-off-by: Majd Dibbiny Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx5/vport.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 67882a834efb..967e0fd06e89 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -48,7 +48,7 @@ int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev, u16 vf_num, struct mlx5_hca_vport_context *rep); int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev, - __be64 *sys_image_guid); + u64 *sys_image_guid); int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, u64 *node_guid); -- cgit v1.2.3 From cb4a316752709be4a644f070440a8be470d92b7d Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Sat, 6 Jun 2015 10:02:14 +1000 Subject: cgroup: use bitmask to filter for_each_subsys Add a new macro for_each_subsys_which that allows all enabled cgroup subsystems to be filtered by a bitmask, such that mask & (1 << ssid) determines if the subsystem is to be processed in the loop body (where ssid is the unique id of the subsystem). Also replace the need_forkexit_callback with two separate bitmasks for each callback to make (ss->{fork,exit}) checks unnecessary. tj: add a short comment for "if (!CGROUP_SUBSYS_COUNT)". Signed-off-by: Aleksa Sarai --- include/linux/cgroup-defs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 26d1cea7929f..c5588c438448 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -490,6 +490,8 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) #else /* CONFIG_CGROUPS */ +#define CGROUP_SUBSYS_COUNT 0 + static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) {} static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {} -- cgit v1.2.3 From 3846c15820a1841225d0245afda4875af23dfbbe Mon Sep 17 00:00:00 2001 From: Peter Jones Date: Fri, 5 Jun 2015 15:14:54 -0400 Subject: efi: Work around ia64 build problem with ESRT driver So, I'm told this problem exists in the world: > Subject: Build error in -next due to 'efi: Add esrt support' > > Building ia64:defconfig ... failed > -------------- > Error log: > > drivers/firmware/efi/esrt.c:28:31: fatal error: asm/early_ioremap.h: No such file or directory > I'm not really sure how it's okay that we have things in asm-generic on some platforms but not others - is having it the same everywhere not the whole point of asm-generic? That said, ia64 doesn't have early_ioremap.h . So instead, since it's difficult to imagine new IA64 machines with UEFI 2.5, just don't build this code there. To me this looks like a workaround - doing something like: generic-y += early_ioremap.h in arch/ia64/include/asm/Kbuild would appear to be more correct, but ia64 has its own early_memremap() decl in arch/ia64/include/asm/io.h , and it's a macro. So adding the above /and/ requiring that asm/io.h be included /after/ asm/early_ioremap.h in all cases would fix it, but that's pretty ugly as well. Since I'm not going to spend the rest of my life rectifying ia64 headers vs "generic" headers that aren't generic, it's much simpler to just not build there. Note that I've only actually tried to build this patch on x86_64, but esrt.o still gets built there, and that would seem to demonstrate that the conditional building is working correctly at all the places the code built before. I no longer have any ia64 machines handy to test that the exclusion actually works there. Signed-off-by: Peter Jones Acked-by: Tony Luck Reviewed-by: Guenter Roeck (Compile-)Tested-by: Guenter Roeck Signed-off-by: Matt Fleming --- include/linux/efi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 024c27e7c0fa..2092965afca3 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -879,7 +879,11 @@ static inline efi_status_t efi_query_variable_store(u32 attributes, unsigned lon #endif extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr); extern int efi_config_init(efi_config_table_type_t *arch_tables); +#ifdef CONFIG_EFI_ESRT extern void __init efi_esrt_init(void); +#else +static inline void efi_esrt_init(void) { } +#endif extern int efi_config_parse_tables(void *config_tables, int count, int sz, efi_config_table_type_t *arch_tables); extern u64 efi_get_iobase (void); -- cgit v1.2.3 From a8077d6573530a91d5674a28cdedbed39c391ff0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 7 Jun 2015 13:15:30 +0200 Subject: bcma: make calls to PCI hostmode functions config-safe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- include/linux/bcma/bcma_driver_pci.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index 5ba6918ca20b..9657f11d48a7 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -246,7 +246,18 @@ static inline void bcma_core_pci_power_save(struct bcma_bus *bus, bool up) } #endif +#ifdef CONFIG_BCMA_DRIVER_PCI_HOSTMODE extern int bcma_core_pci_pcibios_map_irq(const struct pci_dev *dev); extern int bcma_core_pci_plat_dev_init(struct pci_dev *dev); +#else +static inline int bcma_core_pci_pcibios_map_irq(const struct pci_dev *dev) +{ + return -ENOTSUPP; +} +static inline int bcma_core_pci_plat_dev_init(struct pci_dev *dev) +{ + return -ENOTSUPP; +} +#endif #endif /* LINUX_BCMA_DRIVER_PCI_H_ */ -- cgit v1.2.3 From 01d72a95188880b22190e937ed8718ed4b45bdce Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 4 Jun 2015 16:38:17 -0500 Subject: PCI: Remove unused pci_dma_burst_advice() pci_dma_burst_advice() was added by e24c2d963a60 ("[PATCH] PCI: DMA bursting advice") but apparently never used. Remove it. Signed-off-by: Bjorn Helgaas Acked-by: Michal Simek # microblaze CC: David S. Miller --- include/linux/pci.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 353db8dc4c6e..08fb4e307d68 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1197,15 +1197,6 @@ int pci_set_vga_state(struct pci_dev *pdev, bool decode, #define pci_pool_alloc(pool, flags, handle) dma_pool_alloc(pool, flags, handle) #define pci_pool_free(pool, vaddr, addr) dma_pool_free(pool, vaddr, addr) -enum pci_dma_burst_strategy { - PCI_DMA_BURST_INFINITY, /* make bursts as large as possible, - strategy_parameter is N/A */ - PCI_DMA_BURST_BOUNDARY, /* disconnect at every strategy_parameter - byte boundaries */ - PCI_DMA_BURST_MULTIPLE, /* disconnect at some multiple of - strategy_parameter byte boundaries */ -}; - struct msix_entry { u32 vector; /* kernel uses to write allocated vector */ u16 entry; /* driver uses to specify entry, OS writes */ @@ -1430,8 +1421,6 @@ static inline int pci_request_regions(struct pci_dev *dev, const char *res_name) { return -EIO; } static inline void pci_release_regions(struct pci_dev *dev) { } -#define pci_dma_burst_advice(pdev, strat, strategy_parameter) do { } while (0) - static inline void pci_block_cfg_access(struct pci_dev *dev) { } static inline int pci_block_cfg_access_in_atomic(struct pci_dev *dev) { return 0; } -- cgit v1.2.3 From d711b8b30c803b1b2aedf6a3474758798078f9e1 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 6 Jun 2015 11:30:00 +0200 Subject: hrtimers: Make sure hrtimer_resolution is unsigned int MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... in the !CONFIG_HIGH_RES_TIMERS case too. And thus fix warnings like this one: net/sched/sch_api.c: In function ‘psched_show’: net/sched/sch_api.c:1891:6: warning: format ‘%x’ expects argument of type ‘unsigned int’, but argument 6 has type ‘long int’ [-Wformat=] (u32)NSEC_PER_SEC / hrtimer_resolution); Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1433583000-32090-1-git-send-email-bp@alien8.de Signed-off-by: Thomas Gleixner Cc: Thomas Gleixner --- include/linux/hrtimer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 470d876c2eda..3f82a7edc03d 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -309,7 +309,7 @@ extern unsigned int hrtimer_resolution; # define MONOTONIC_RES_NSEC LOW_RES_NSEC # define KTIME_MONOTONIC_RES KTIME_LOW_RES -#define hrtimer_resolution LOW_RES_NSEC +#define hrtimer_resolution (unsigned int)LOW_RES_NSEC static inline void hrtimer_peek_ahead_timers(void) { } -- cgit v1.2.3 From 2c1296d92ac0367364bcb73a43c12a0bdfbfee75 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 28 May 2015 18:41:32 +0200 Subject: iommu: Add iommu_get_domain_for_dev function This function can be used to request the current domain a device is attached to. Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 0546b8710ce3..683a1c4b15e7 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -193,6 +193,7 @@ extern int iommu_attach_device(struct iommu_domain *domain, struct device *dev); extern void iommu_detach_device(struct iommu_domain *domain, struct device *dev); +extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot); extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, @@ -332,6 +333,11 @@ static inline void iommu_detach_device(struct iommu_domain *domain, { } +static inline struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) +{ + return NULL; +} + static inline int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, int gfp_order, int prot) { -- cgit v1.2.3 From a1015c2b99b94cf521603b41debf167114031456 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 28 May 2015 18:41:33 +0200 Subject: iommu: Introduce direct mapped region handling Add two new functions to the IOMMU-API to allow the IOMMU drivers to export the requirements for direct mapped regions per device. This is useful for exporting the information in Intel VT-d's RMRR entries or AMD-Vi's unity mappings. Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 683a1c4b15e7..689499904166 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -114,6 +114,20 @@ enum iommu_attr { DOMAIN_ATTR_MAX, }; +/** + * struct iommu_dm_region - descriptor for a direct mapped memory region + * @list: Linked list pointers + * @start: System physical start address of the region + * @length: Length of the region in bytes + * @prot: IOMMU Protection flags (READ/WRITE/...) + */ +struct iommu_dm_region { + struct list_head list; + phys_addr_t start; + size_t length; + int prot; +}; + #ifdef CONFIG_IOMMU_API /** @@ -159,6 +173,10 @@ struct iommu_ops { int (*domain_set_attr)(struct iommu_domain *domain, enum iommu_attr attr, void *data); + /* Request/Free a list of direct mapping requirements for a device */ + void (*get_dm_regions)(struct device *dev, struct list_head *list); + void (*put_dm_regions)(struct device *dev, struct list_head *list); + /* Window handling functions */ int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr, phys_addr_t paddr, u64 size, int prot); @@ -205,6 +223,9 @@ extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t io extern void iommu_set_fault_handler(struct iommu_domain *domain, iommu_fault_handler_t handler, void *token); +extern void iommu_get_dm_regions(struct device *dev, struct list_head *list); +extern void iommu_put_dm_regions(struct device *dev, struct list_head *list); + extern int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group); extern void iommu_detach_group(struct iommu_domain *domain, @@ -379,6 +400,16 @@ static inline void iommu_set_fault_handler(struct iommu_domain *domain, { } +static inline void iommu_get_dm_regions(struct device *dev, + struct list_head *list) +{ +} + +static inline void iommu_put_dm_regions(struct device *dev, + struct list_head *list) +{ +} + static inline int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) { -- cgit v1.2.3 From 6827ca83695d5e41ad31b0719788ee65f00ca4b3 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 28 May 2015 18:41:35 +0200 Subject: iommu: Add function to query the default domain of a group This will be used to handle unity mappings in the iommu drivers. Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 689499904166..b944b2be4fa2 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -249,6 +249,7 @@ extern int iommu_group_unregister_notifier(struct iommu_group *group, struct notifier_block *nb); extern int iommu_group_id(struct iommu_group *group); extern struct iommu_group *iommu_group_get_for_dev(struct device *dev); +extern struct iommu_domain *iommu_group_default_domain(struct iommu_group *); extern int iommu_domain_get_attr(struct iommu_domain *domain, enum iommu_attr, void *data); -- cgit v1.2.3 From bd00c606a6f60ca015a62bdbf671eadd48a4ca82 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 9 Jun 2015 15:06:55 +0100 Subject: iommu/vt-d: Change PASID support to bit 40 of Extended Capability Register The existing hardware implementations with PASID support advertised in bit 28? Forget them. They do not exist. Bit 28 means nothing. When we have something that works, it'll use bit 40. Do not attempt to infer anything meaningful from bit 28. This will be reflected in an updated VT-d spec in the extremely near future. Signed-off-by: David Woodhouse --- include/linux/intel-iommu.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 796ef9645827..a240e61a7700 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -115,13 +115,14 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) * Extended Capability Register */ +#define ecap_pasid(e) ((e >> 40) & 0x1) #define ecap_pss(e) ((e >> 35) & 0x1f) #define ecap_eafs(e) ((e >> 34) & 0x1) #define ecap_nwfs(e) ((e >> 33) & 0x1) #define ecap_srs(e) ((e >> 31) & 0x1) #define ecap_ers(e) ((e >> 30) & 0x1) #define ecap_prs(e) ((e >> 29) & 0x1) -#define ecap_pasid(e) ((e >> 28) & 0x1) +/* PASID support used to be on bit 28 */ #define ecap_dis(e) ((e >> 27) & 0x1) #define ecap_nest(e) ((e >> 26) & 0x1) #define ecap_mts(e) ((e >> 25) & 0x1) -- cgit v1.2.3 From b1999477ed91c3c33891acfe0e18a4457e5c4915 Mon Sep 17 00:00:00 2001 From: Guo Zeng Date: Tue, 14 Apr 2015 11:55:55 +0000 Subject: ARM: prima2: move to use REGMAP APIs for rtciobrg all devices behind rtciobrg needs a special way to access. currently they are using a platform-specific API. this patch moves to REGMAP, then clients can use regmap APIs to read/write. for the moment, old APIs are still kept, once all clients move to regmap, old APIs will be dropped. this patch also does minor clean for comments, authors statement. Signed-off-by: Guo Zeng Signed-off-by: Barry Song --- include/linux/rtc/sirfsoc_rtciobrg.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtc/sirfsoc_rtciobrg.h b/include/linux/rtc/sirfsoc_rtciobrg.h index 2c92e1c8e055..aefd997262e4 100644 --- a/include/linux/rtc/sirfsoc_rtciobrg.h +++ b/include/linux/rtc/sirfsoc_rtciobrg.h @@ -9,10 +9,14 @@ #ifndef _SIRFSOC_RTC_IOBRG_H_ #define _SIRFSOC_RTC_IOBRG_H_ +struct regmap_config; + extern void sirfsoc_rtc_iobrg_besyncing(void); extern u32 sirfsoc_rtc_iobrg_readl(u32 addr); extern void sirfsoc_rtc_iobrg_writel(u32 val, u32 addr); +struct regmap *devm_regmap_init_iobg(struct device *dev, + const struct regmap_config *config); #endif -- cgit v1.2.3 From ae60d6a0e3a9197d37f8c8c4584a8ecd18518cd6 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Thu, 28 May 2015 19:09:55 +0200 Subject: time: Refactor usecs_to_jiffies Refactor the usecs_to_jiffies conditional code part in time.c and jiffies.h putting it into conditional functions rather than #ifdefs to improve readability. This is analogous to the msecs_to_jiffies() cleanup in commit ca42aaf0c861 ("time: Refactor msecs_to_jiffies") Signed-off-by: Nicholas Mc Guire Cc: Masahiro Yamada Cc: Sam Ravnborg Cc: Joe Perches Cc: John Stultz Cc: Andrew Hunter Cc: Paul Turner Cc: Michal Marek Link: http://lkml.kernel.org/r/1432832996-12129-1-git-send-email-hofrat@osadl.org Signed-off-by: Thomas Gleixner --- include/linux/jiffies.h | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 3bde5eb8568b..a316ebea0a89 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -362,7 +362,32 @@ static inline unsigned long msecs_to_jiffies(const unsigned int m) } } -extern unsigned long usecs_to_jiffies(const unsigned int u); +extern unsigned long __usecs_to_jiffies(const unsigned int u); +#if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ) +static inline unsigned long _usecs_to_jiffies(const unsigned int u) +{ + return (u + (USEC_PER_SEC / HZ) - 1) / (USEC_PER_SEC / HZ); +} +#elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC) +static inline unsigned long _usecs_to_jiffies(const unsigned int u) +{ + return u * (HZ / USEC_PER_SEC); +} +static inline unsigned long _usecs_to_jiffies(const unsigned int u) +{ +#else +static inline unsigned long _usecs_to_jiffies(const unsigned int u) +{ + return (USEC_TO_HZ_MUL32 * u + USEC_TO_HZ_ADJ32) + >> USEC_TO_HZ_SHR32; +} +#endif + +static inline unsigned long usecs_to_jiffies(const unsigned int u) +{ + return __usecs_to_jiffies(u); +} + extern unsigned long timespec_to_jiffies(const struct timespec *value); extern void jiffies_to_timespec(const unsigned long jiffies, struct timespec *value); -- cgit v1.2.3 From c569a23d65ac2900d9998d3fe04044fe95be6b2f Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Thu, 28 May 2015 19:09:56 +0200 Subject: time: Allow gcc to fold usecs_to_jiffies(constant) To allow constant folding in usecs_to_jiffies() conditionally calls the HZ dependent _usecs_to_jiffies() helpers or, when gcc can not figure out constant folding, __usecs_to_jiffies, which is the renamed original usecs_to_jiffies() function. Signed-off-by: Nicholas Mc Guire Cc: Masahiro Yamada Cc: Sam Ravnborg Cc: Joe Perches Cc: John Stultz Cc: Andrew Hunter Cc: Paul Turner Cc: Michal Marek Link: http://lkml.kernel.org/r/1432832996-12129-2-git-send-email-hofrat@osadl.org Signed-off-by: Thomas Gleixner --- include/linux/jiffies.h | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index a316ebea0a89..535fd3bb1ba8 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -383,9 +383,37 @@ static inline unsigned long _usecs_to_jiffies(const unsigned int u) } #endif +/** + * usecs_to_jiffies: - convert microseconds to jiffies + * @u: time in microseconds + * + * conversion is done as follows: + * + * - 'too large' values [that would result in larger than + * MAX_JIFFY_OFFSET values] mean 'infinite timeout' too. + * + * - all other values are converted to jiffies by either multiplying + * the input value by a factor or dividing it with a factor and + * handling any 32-bit overflows as for msecs_to_jiffies. + * + * usecs_to_jiffies() checks for the passed in value being a constant + * via __builtin_constant_p() allowing gcc to eliminate most of the + * code, __usecs_to_jiffies() is called if the value passed does not + * allow constant folding and the actual conversion must be done at + * runtime. + * the HZ range specific helpers _usecs_to_jiffies() are called both + * directly here and from __msecs_to_jiffies() in the case where + * constant folding is not possible. + */ static inline unsigned long usecs_to_jiffies(const unsigned int u) { - return __usecs_to_jiffies(u); + if (__builtin_constant_p(u)) { + if (u > jiffies_to_usecs(MAX_JIFFY_OFFSET)) + return MAX_JIFFY_OFFSET; + return _usecs_to_jiffies(u); + } else { + return __usecs_to_jiffies(u); + } } extern unsigned long timespec_to_jiffies(const struct timespec *value); -- cgit v1.2.3 From ed06aeefdac348cfb91a3db5fe1067e3202afd70 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Tue, 9 Jun 2015 22:26:05 +0200 Subject: nfc: st-nci: Rename st21nfcb to st-nci STMicroelectronics NFC NCI chips family is extending with the new ST21NFCC using the AMS AS39230 RF booster. The st21nfcb driver is relevant for this solution and might be with future products. Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz --- include/linux/platform_data/st-nci.h | 29 +++++++++++++++++++++++++++++ include/linux/platform_data/st21nfcb.h | 29 ----------------------------- include/linux/platform_data/st_nci.h | 29 +++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 29 deletions(-) create mode 100644 include/linux/platform_data/st-nci.h delete mode 100644 include/linux/platform_data/st21nfcb.h create mode 100644 include/linux/platform_data/st_nci.h (limited to 'include/linux') diff --git a/include/linux/platform_data/st-nci.h b/include/linux/platform_data/st-nci.h new file mode 100644 index 000000000000..d9d400a297bd --- /dev/null +++ b/include/linux/platform_data/st-nci.h @@ -0,0 +1,29 @@ +/* + * Driver include for ST NCI NFC chip family. + * + * Copyright (C) 2014-2015 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef _ST_NCI_H_ +#define _ST_NCI_H_ + +#define ST_NCI_DRIVER_NAME "st_nci" + +struct st_nci_nfc_platform_data { + unsigned int gpio_reset; + unsigned int irq_polarity; +}; + +#endif /* _ST_NCI_H_ */ diff --git a/include/linux/platform_data/st21nfcb.h b/include/linux/platform_data/st21nfcb.h deleted file mode 100644 index b023373d9874..000000000000 --- a/include/linux/platform_data/st21nfcb.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Driver include for the ST21NFCB NFC chip. - * - * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ - -#ifndef _ST21NFCB_NCI_H_ -#define _ST21NFCB_NCI_H_ - -#define ST21NFCB_NCI_DRIVER_NAME "st21nfcb_nci" - -struct st21nfcb_nfc_platform_data { - unsigned int gpio_reset; - unsigned int irq_polarity; -}; - -#endif /* _ST21NFCB_NCI_H_ */ diff --git a/include/linux/platform_data/st_nci.h b/include/linux/platform_data/st_nci.h new file mode 100644 index 000000000000..d9d400a297bd --- /dev/null +++ b/include/linux/platform_data/st_nci.h @@ -0,0 +1,29 @@ +/* + * Driver include for ST NCI NFC chip family. + * + * Copyright (C) 2014-2015 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef _ST_NCI_H_ +#define _ST_NCI_H_ + +#define ST_NCI_DRIVER_NAME "st_nci" + +struct st_nci_nfc_platform_data { + unsigned int gpio_reset; + unsigned int irq_polarity; +}; + +#endif /* _ST_NCI_H_ */ -- cgit v1.2.3 From 205a525c334295e3cd4cc7755fd2c0398e3a787f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 9 Jun 2015 18:19:39 +0800 Subject: random: Add callback API for random pool readiness The get_blocking_random_bytes API is broken because the wait can be arbitrarily long (potentially forever) so there is no safe way of calling it from within the kernel. This patch replaces it with a callback API instead. The callback is invoked potentially from interrupt context so the user needs to schedule their own work thread if necessary. In addition to adding callbacks, they can also be removed as otherwise this opens up a way for user-space to allocate kernel memory with no bound (by opening algif_rng descriptors and then closing them). Signed-off-by: Herbert Xu --- include/linux/random.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/random.h b/include/linux/random.h index 796267d56901..30e2aca0b16a 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -6,8 +6,15 @@ #ifndef _LINUX_RANDOM_H #define _LINUX_RANDOM_H +#include #include +struct random_ready_callback { + struct list_head list; + void (*func)(struct random_ready_callback *rdy); + struct module *owner; +}; + extern void add_device_randomness(const void *, unsigned int); extern void add_input_randomness(unsigned int type, unsigned int code, unsigned int value); @@ -15,6 +22,8 @@ extern void add_interrupt_randomness(int irq, int irq_flags); extern void get_random_bytes(void *buf, int nbytes); extern void get_blocking_random_bytes(void *buf, int nbytes); +extern int add_random_ready_callback(struct random_ready_callback *rdy); +extern void del_random_ready_callback(struct random_ready_callback *rdy); extern void get_random_bytes_arch(void *buf, int nbytes); void generate_random_uuid(unsigned char uuid_out[16]); extern int random_int_secret_init(void); -- cgit v1.2.3 From c2719503f5e1e6213d716bb078bdad01e28ebcbf Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 9 Jun 2015 18:19:42 +0800 Subject: random: Remove kernel blocking API This patch removes the kernel blocking API as it has been completely replaced by the callback API. Signed-off-by: Herbert Xu --- include/linux/random.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/random.h b/include/linux/random.h index 30e2aca0b16a..e651874df2c9 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -21,7 +21,6 @@ extern void add_input_randomness(unsigned int type, unsigned int code, extern void add_interrupt_randomness(int irq, int irq_flags); extern void get_random_bytes(void *buf, int nbytes); -extern void get_blocking_random_bytes(void *buf, int nbytes); extern int add_random_ready_callback(struct random_ready_callback *rdy); extern void del_random_ready_callback(struct random_ready_callback *rdy); extern void get_random_bytes_arch(void *buf, int nbytes); -- cgit v1.2.3 From 0bb979472a7401022109e81dd89d777adea58710 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 10 Jun 2015 08:01:20 -0600 Subject: cfq-iosched: fix the setting of IOPS mode on SSDs A previous commit wanted to make CFQ default to IOPS mode on non-rotational storage, however it did so when the queue was initialized and the non-rotational flag is only set later on in the probe. Add an elevator hook that gets called off the add_disk() path, at that point we know that feature probing has finished, and we can reliably check for the various flags that drivers can set. Fixes: 41c0126b ("block: Make CFQ default to IOPS mode on SSDs") Tested-by: Romain Francoise Signed-off-by: Jens Axboe --- include/linux/elevator.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 45a91474487d..638b324f0291 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -39,6 +39,7 @@ typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct reques typedef int (elevator_init_fn) (struct request_queue *, struct elevator_type *e); typedef void (elevator_exit_fn) (struct elevator_queue *); +typedef void (elevator_registered_fn) (struct request_queue *); struct elevator_ops { @@ -68,6 +69,7 @@ struct elevator_ops elevator_init_fn *elevator_init_fn; elevator_exit_fn *elevator_exit_fn; + elevator_registered_fn *elevator_registered_fn; }; #define ELV_NAME_MAX (16) -- cgit v1.2.3 From 5c6e3a97e969e978368df83239583771c936efea Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 8 Jun 2015 10:09:48 +0900 Subject: power_supply: sysfs: Bring back write to writeable properties The fix for NULL pointer exception related to calling uevent for not finished probe caused to set all writeable properties as non-writeable. This was caused by checking if property is writeable before the initial increase of power supply usage counter and in the same time using wrapper over property_is_writeable(). The wrapper returns ENODEV if the usage counter is still 0. The call trace looked like: device probe: power_supply_register() use_cnt = 0; device_add() create sysfs entries power_supply_attr_is_visible() power_supply_property_is_writeable() if (use_cnt == 0) return -ENODEV; use_cnt++; Replace the usage of wrapper with direct call to property_is_writeable() from driver. This should be safe call during device probe because implementations of this callback just return 0/1 for different properties and they do not access any of the driver's internal data. Fixes: 8e59c7f23410 ("power_supply: Fix NULL pointer dereference during bq27x00_battery probe") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index a80f1fd01ddb..0395bcb18ddb 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -206,6 +206,11 @@ struct power_supply_desc { int (*set_property)(struct power_supply *psy, enum power_supply_property psp, const union power_supply_propval *val); + /* + * property_is_writeable() will be called during registration + * of power supply. If this happens during device probe then it must + * not access internal data of device (because probe did not end). + */ int (*property_is_writeable)(struct power_supply *psy, enum power_supply_property psp); void (*external_power_changed)(struct power_supply *psy); -- cgit v1.2.3 From fe27e1dfe9962b07215ee01445926306ddbb7c25 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 9 Jun 2015 23:37:56 +0200 Subject: power: Add devm_power_supply_get_by_phandle() helper function This commit adds a resource-managed version of the power_supply_get_by_phandle() function. Signed-off-by: Hans de Goede Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 0395bcb18ddb..ef9f1592185d 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -292,10 +292,15 @@ extern void power_supply_put(struct power_supply *psy); #ifdef CONFIG_OF extern struct power_supply *power_supply_get_by_phandle(struct device_node *np, const char *property); +extern struct power_supply *devm_power_supply_get_by_phandle( + struct device *dev, const char *property); #else /* !CONFIG_OF */ static inline struct power_supply * power_supply_get_by_phandle(struct device_node *np, const char *property) { return NULL; } +static inline struct power_supply * +devm_power_supply_get_by_phandle(struct device *dev, const char *property) +{ return NULL; } #endif /* CONFIG_OF */ extern void power_supply_changed(struct power_supply *psy); extern int power_supply_am_i_supplied(struct power_supply *psy); -- cgit v1.2.3 From 3037e9ea780027d41baaaabb68a749e49e7c8260 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Wed, 10 Jun 2015 21:04:54 +0100 Subject: clk: fixed: Add comment to clk_fixed_set_rate Currently it is not made explicit why clk_fixed_set_rate() can ignore its arguments and unconditionally return success. Add a comment to explain this. We also mark the clk_ops table const since it should never be modified at runtime. Suggested-by: Stephen Boyd Signed-off-by: Daniel Thompson Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 2e5df069ca34..4a943d13625b 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -459,7 +459,7 @@ struct clk_fixed_factor { unsigned int div; }; -extern struct clk_ops clk_fixed_factor_ops; +extern const struct clk_ops clk_fixed_factor_ops; struct clk *clk_register_fixed_factor(struct device *dev, const char *name, const char *parent_name, unsigned long flags, unsigned int mult, unsigned int div); -- cgit v1.2.3 From b064a8fa77dfead647564c46ac8fc5b13bd1ab73 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 10 Jun 2015 01:33:36 +0200 Subject: ACPI / init: Switch over platform to the ACPI mode later Commit 73f7d1ca3263 "ACPI / init: Run acpi_early_init() before timekeeping_init()" moved the ACPI subsystem initialization, including the ACPI mode enabling, to an earlier point in the initialization sequence, to allow the timekeeping subsystem use ACPI early. Unfortunately, that resulted in boot regressions on some systems and the early ACPI initialization was moved toward its original position in the kernel initialization code by commit c4e1acbb35e4 "ACPI / init: Invoke early ACPI initialization later". However, that turns out to be insufficient, as boot is still broken on the Tyan S8812 mainboard. To fix that issue, split the ACPI early initialization code into two pieces so the majority of it still located in acpi_early_init() and the part switching over the platform into the ACPI mode goes into a new function, acpi_subsystem_init(), executed at the original early ACPI initialization spot. That fixes the Tyan S8812 boot problem, but still allows ACPI tables to be loaded earlier which is useful to the EFI code in efi_enter_virtual_mode(). Link: https://bugzilla.kernel.org/show_bug.cgi?id=97141 Fixes: 73f7d1ca3263 "ACPI / init: Run acpi_early_init() before timekeeping_init()" Reported-and-tested-by: Marius Tolzmann Signed-off-by: Rafael J. Wysocki Acked-by: Toshi Kani Reviewed-by: Hanjun Guo Reviewed-by: Lee, Chun-Yi --- include/linux/acpi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index e4da5e35e29c..4550be3bb63b 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -440,6 +440,7 @@ extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, #define ACPI_OST_SC_INSERT_NOT_SUPPORTED 0x82 extern void acpi_early_init(void); +extern void acpi_subsystem_init(void); extern int acpi_nvs_register(__u64 start, __u64 size); @@ -494,6 +495,7 @@ static inline const char *acpi_dev_name(struct acpi_device *adev) } static inline void acpi_early_init(void) { } +static inline void acpi_subsystem_init(void) { } static inline int early_acpi_boot_init(void) { -- cgit v1.2.3 From 3c87ef6efb40f0e339d705c194b2224f854ec38e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 3 Jun 2015 16:14:25 -0400 Subject: sunrpc: keep a count of swapfiles associated with the rpc_clnt Jerome reported seeing a warning pop when working with a swapfile on NFS. The nfs_swap_activate can end up calling sk_set_memalloc while holding the rcu_read_lock and that function can sleep. To fix that, we need to take a reference to the xprt while holding the rcu_read_lock, set the socket up for swapping and then drop that reference. But, xprt_put is not exported and having NFS deal with the underlying xprt is a bit of layering violation anyway. Fix this by adding a set of activate/deactivate functions that take a rpc_clnt pointer instead of an rpc_xprt, and have nfs_swap_activate and nfs_swap_deactivate call those. Also, add a per-rpc_clnt atomic counter to keep track of the number of active swapfiles associated with it. When the counter does a 0->1 transition, we enable swapping on the xprt, when we do a 1->0 transition we disable swapping on it. This also allows us to be a bit more selective with the RPC_TASK_SWAPPER flag. If non-swapper and swapper clnts are sharing a xprt, then we only need to flag the tasks from the swapper clnt with that flag. Acked-by: Mel Gorman Reported-by: Jerome Marchand Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + include/linux/sunrpc/sched.h | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 598ba80ec30c..131032f15cc1 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -56,6 +56,7 @@ struct rpc_clnt { struct rpc_rtt * cl_rtt; /* RTO estimator data */ const struct rpc_timeout *cl_timeout; /* Timeout strategy */ + atomic_t cl_swapper; /* swapfile count */ int cl_nodelen; /* nodename length */ char cl_nodename[UNX_MAXNODENAME+1]; struct rpc_pipe_dir_head cl_pipedir_objects; diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 2aa68976a482..d703f0ef37d8 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -268,4 +268,20 @@ static inline void rpc_assign_waitqueue_name(struct rpc_wait_queue *q, } #endif +#if IS_ENABLED(CONFIG_SUNRPC_SWAP) +int rpc_clnt_swap_activate(struct rpc_clnt *clnt); +void rpc_clnt_swap_deactivate(struct rpc_clnt *clnt); +#else +static inline int +rpc_clnt_swap_activate(struct rpc_clnt *clnt) +{ + return -EINVAL; +} + +static inline void +rpc_clnt_swap_deactivate(struct rpc_clnt *clnt) +{ +} +#endif /* CONFIG_SUNRPC_SWAP */ + #endif /* _LINUX_SUNRPC_SCHED_H_ */ -- cgit v1.2.3 From 8e2281330f9930bccf77cf04027ec60b6cc0fb34 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 3 Jun 2015 16:14:26 -0400 Subject: sunrpc: make xprt->swapper an atomic_t Split xs_swapper into enable/disable functions and eliminate the "enable" flag. Currently, it's racy if you have multiple swapon/swapoff operations running in parallel over the same xprt. Also fix it so that we only set it to a memalloc socket on a 0->1 transition and only clear it on a 1->0 transition. Cc: Mel Gorman Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index bc9c39d6d30d..9a3308703c15 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -180,7 +180,7 @@ struct rpc_xprt { atomic_t num_reqs; /* total slots */ unsigned long state; /* transport state */ unsigned char resvport : 1; /* use a reserved port */ - unsigned int swapper; /* we're swapping over this + atomic_t swapper; /* we're swapping over this transport */ unsigned int bind_index; /* bind function index */ @@ -346,7 +346,8 @@ void xprt_release_rqst_cong(struct rpc_task *task); void xprt_disconnect_done(struct rpc_xprt *xprt); void xprt_force_disconnect(struct rpc_xprt *xprt); void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); -int xs_swapper(struct rpc_xprt *xprt, int enable); +int xs_swapper_enable(struct rpc_xprt *xprt); +void xs_swapper_disable(struct rpc_xprt *xprt); bool xprt_lock_connect(struct rpc_xprt *, struct rpc_task *, void *); void xprt_unlock_connect(struct rpc_xprt *, void *); -- cgit v1.2.3 From d67fa4d85a2143b46052b2e9ccc6749a4c97b2de Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 3 Jun 2015 16:14:29 -0400 Subject: sunrpc: turn swapper_enable/disable functions into rpc_xprt_ops RDMA xprts don't have a sock_xprt, but an rdma_xprt, so the xs_swapper_enable/disable functions will likely oops when fed an RDMA xprt. Turn these functions into rpc_xprt_ops so that that doesn't occur. For now the RDMA versions are no-ops that just return -EINVAL on an attempt to swapon. Cc: Chuck Lever Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 9a3308703c15..b6096592b1d4 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -133,6 +133,8 @@ struct rpc_xprt_ops { void (*close)(struct rpc_xprt *xprt); void (*destroy)(struct rpc_xprt *xprt); void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq); + int (*enable_swap)(struct rpc_xprt *xprt); + void (*disable_swap)(struct rpc_xprt *xprt); }; /* @@ -328,6 +330,18 @@ static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 * return p + xprt->tsh_size; } +static inline int +xprt_enable_swap(struct rpc_xprt *xprt) +{ + return xprt->ops->enable_swap(xprt); +} + +static inline void +xprt_disable_swap(struct rpc_xprt *xprt) +{ + xprt->ops->disable_swap(xprt); +} + /* * Transport switch helper functions */ @@ -346,8 +360,6 @@ void xprt_release_rqst_cong(struct rpc_task *task); void xprt_disconnect_done(struct rpc_xprt *xprt); void xprt_force_disconnect(struct rpc_xprt *xprt); void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); -int xs_swapper_enable(struct rpc_xprt *xprt); -void xs_swapper_disable(struct rpc_xprt *xprt); bool xprt_lock_connect(struct rpc_xprt *, struct rpc_task *, void *); void xprt_unlock_connect(struct rpc_xprt *, void *); -- cgit v1.2.3 From 11598b8ff2b97cf034d0c025cf125c92b574bafc Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 10 Jun 2015 16:54:28 -0400 Subject: NFS: Remove unused nfs_rw_ops->rw_release() function This was only ever set to nfs_writeback_release_common(), a function which is completely empty. Let's just drop this function pointer and simplify the code a bit. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- include/linux/nfs_page.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 3eb072dbce83..f2f650f136ee 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -67,7 +67,6 @@ struct nfs_rw_ops { const fmode_t rw_mode; struct nfs_pgio_header *(*rw_alloc_header)(void); void (*rw_free_header)(struct nfs_pgio_header *); - void (*rw_release)(struct nfs_pgio_header *); int (*rw_done)(struct rpc_task *, struct nfs_pgio_header *, struct inode *); void (*rw_result)(struct rpc_task *, struct nfs_pgio_header *); -- cgit v1.2.3 From 4a06825839889cc1756d0dd8a52d6b1071ee0263 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 May 2015 14:02:25 -0400 Subject: SUNRPC: Transport fault injection It has been exceptionally useful to exercise the logic that handles local immediate errors and RDMA connection loss. To enable developers to test this regularly and repeatably, add logic to simulate connection loss every so often. Fault injection is disabled by default. It is enabled with $ sudo echo xxx > /sys/kernel/debug/sunrpc/inject_fault/disconnect where "xxx" is a large positive number of transport method calls before a disconnect. A value of several thousand is usually a good number that allows reasonable forward progress while still causing a lot of connection drops. These hooks are disabled when SUNRPC_DEBUG is turned off. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index b6096592b1d4..0fb9acbb4780 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -135,6 +135,7 @@ struct rpc_xprt_ops { void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq); int (*enable_swap)(struct rpc_xprt *xprt); void (*disable_swap)(struct rpc_xprt *xprt); + void (*inject_disconnect)(struct rpc_xprt *xprt); }; /* @@ -244,6 +245,7 @@ struct rpc_xprt { const char *address_strings[RPC_DISPLAY_MAX]; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) struct dentry *debugfs; /* debugfs directory */ + atomic_t inject_disconnect; #endif }; @@ -445,6 +447,23 @@ static inline int xprt_test_and_set_binding(struct rpc_xprt *xprt) return test_and_set_bit(XPRT_BINDING, &xprt->state); } +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) +extern unsigned int rpc_inject_disconnect; +static inline void xprt_inject_disconnect(struct rpc_xprt *xprt) +{ + if (!rpc_inject_disconnect) + return; + if (atomic_dec_return(&xprt->inject_disconnect)) + return; + atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect); + xprt->ops->inject_disconnect(xprt); +} +#else +static inline void xprt_inject_disconnect(struct rpc_xprt *xprt) +{ +} +#endif + #endif /* __KERNEL__*/ #endif /* _LINUX_SUNRPC_XPRT_H */ -- cgit v1.2.3 From 4f822c625f9c68267ffee7519519e304858a46c3 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 10 Jun 2015 18:07:57 -0700 Subject: net: phy: broadcom: include phy.h for brcmphy.h We utilize inline functions from the PHY library, make sure that we do include phy.h in brcmphy.h in order for the code including brcmphy.h not to have to resolve this inclusion dependency. Fixes: 705314797b8b ("net: phy: broadcom: move shadow 0x1C register accessors to brcmphy.h") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 656da2a12ffe..abb6106f839d 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -1,6 +1,8 @@ #ifndef _LINUX_BRCMPHY_H #define _LINUX_BRCMPHY_H +#include + #define PHY_ID_BCM50610 0x0143bd60 #define PHY_ID_BCM50610M 0x0143bd70 #define PHY_ID_BCM5241 0x0143bc30 -- cgit v1.2.3 From 8bc84b79265f66d5af736473db582e74b28e099d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 10 Jun 2015 18:07:58 -0700 Subject: net: phy: broadcom: define Broadcom pseudo-PHY address in brcmphy.h Define the pseudo-PHY address (30) which is used by all Broadcom Ethernet switches in a shared header file. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index abb6106f839d..697ca7795bd9 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -3,6 +3,11 @@ #include +/* All Broadcom Ethernet switches have a pseudo-PHY at address 30 which is used + * to configure the switch internal registers via MDIO accesses. + */ +#define BRCM_PSEUDO_PHY_ADDR 30 + #define PHY_ID_BCM50610 0x0143bd60 #define PHY_ID_BCM50610M 0x0143bd70 #define PHY_ID_BCM5241 0x0143bc30 -- cgit v1.2.3 From d290f1e70d85a9a4d124594c6a3d769329960bdc Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 28 May 2015 18:41:36 +0200 Subject: iommu: Introduce iommu_request_dm_for_dev() This function can be called by an IOMMU driver to request that a device's default domain is direct mapped. Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index b944b2be4fa2..dc767f7c3704 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -225,6 +225,7 @@ extern void iommu_set_fault_handler(struct iommu_domain *domain, extern void iommu_get_dm_regions(struct device *dev, struct list_head *list); extern void iommu_put_dm_regions(struct device *dev, struct list_head *list); +extern int iommu_request_dm_for_dev(struct device *dev); extern int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group); @@ -411,6 +412,11 @@ static inline void iommu_put_dm_regions(struct device *dev, { } +static inline int iommu_request_dm_for_dev(struct device *dev) +{ + return -ENODEV; +} + static inline int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) { -- cgit v1.2.3 From dfabde206aa10ae71a89ba75e68b1f58a6336a05 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 11 May 2015 17:08:50 +0100 Subject: mailbox: Add ability for clients to request channels by name This patch supplies a new framework API; mbox_request_channel_byname(). It works by supplying the usual client pointer as the first argument and a string as the second. The API will search the client's node for a 'mbox-names' property then request a channel in the normal way using the requested string's index as the expected second 'index' argument. Signed-off-by: Lee Jones Signed-off-by: Jassi Brar --- include/linux/mailbox_client.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mailbox_client.h b/include/linux/mailbox_client.h index 1726ccbd8009..44348710953f 100644 --- a/include/linux/mailbox_client.h +++ b/include/linux/mailbox_client.h @@ -40,6 +40,8 @@ struct mbox_client { void (*tx_done)(struct mbox_client *cl, void *mssg, int r); }; +struct mbox_chan *mbox_request_channel_byname(struct mbox_client *cl, + const char *name); struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index); int mbox_send_message(struct mbox_chan *chan, void *mssg); void mbox_client_txdone(struct mbox_chan *chan, int r); /* atomic */ -- cgit v1.2.3 From dc14bdef8762a8098b1da881b611d722e24fe787 Mon Sep 17 00:00:00 2001 From: Vincent Cuissard Date: Thu, 11 Jun 2015 14:00:19 +0200 Subject: NFC: nfcmrvl: add platform_data and DT configuration Declare nfcmrvl platform_data structure and few DT parameters for nfcmrvl driver. Signed-off-by: Vincent Cuissard Signed-off-by: Samuel Ortiz --- include/linux/platform_data/nfcmrvl.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 include/linux/platform_data/nfcmrvl.h (limited to 'include/linux') diff --git a/include/linux/platform_data/nfcmrvl.h b/include/linux/platform_data/nfcmrvl.h new file mode 100644 index 000000000000..106cfe5ed589 --- /dev/null +++ b/include/linux/platform_data/nfcmrvl.h @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2015, Marvell International Ltd. + * + * This software file (the "File") is distributed by Marvell International + * Ltd. under the terms of the GNU General Public License Version 2, June 1991 + * (the "License"). You may use, redistribute and/or modify this File in + * accordance with the terms and conditions of the License, a copy of which + * is available on the worldwide web at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt. + * + * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE + * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE + * ARE EXPRESSLY DISCLAIMED. The License provides additional details about + * this warranty disclaimer. + */ + +#ifndef _NFCMRVL_PTF_H_ +#define _NFCMRVL_PTF_H_ + +struct nfcmrvl_platform_data { + /* + * Generic + */ + + /* GPIO that is wired to RESET_N signal */ + unsigned int reset_n_io; + /* Tell if transport is muxed in HCI one */ + unsigned int hci_muxed; +}; + +#endif /* _NFCMRVL_PTF_H_ */ -- cgit v1.2.3 From e097dc624f784debbde49701a493bf920bc422c7 Mon Sep 17 00:00:00 2001 From: Vincent Cuissard Date: Thu, 11 Jun 2015 14:00:20 +0200 Subject: NFC: nfcmrvl: add UART driver Add support of Marvell NFC chip controlled over UART Signed-off-by: Vincent Cuissard Signed-off-by: Samuel Ortiz --- include/linux/platform_data/nfcmrvl.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/nfcmrvl.h b/include/linux/platform_data/nfcmrvl.h index 106cfe5ed589..ac91707dabcb 100644 --- a/include/linux/platform_data/nfcmrvl.h +++ b/include/linux/platform_data/nfcmrvl.h @@ -26,6 +26,15 @@ struct nfcmrvl_platform_data { unsigned int reset_n_io; /* Tell if transport is muxed in HCI one */ unsigned int hci_muxed; + + /* + * UART specific + */ + + /* Tell if UART needs flow control at init */ + unsigned int flow_control; + /* Tell if firmware supports break control for power management */ + unsigned int break_control; }; #endif /* _NFCMRVL_PTF_H_ */ -- cgit v1.2.3 From facc9699f0fe7d65a92cc09e175662659306066d Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 11 Jun 2015 14:47:27 +0300 Subject: net/mlx5e: Fix HW MTU settings Previously we configured HW MTU to be netdev->mtu, actually we need to configure netdev->mtu + (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN). Also, query MTU can not fail, hence make the relevant helper a void functionm, add mlx5e_set_dev_port_mtu, helper function to handle MTU setting. Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 6093bde16b94..c0930f8d7021 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -756,11 +756,11 @@ int mlx5_set_port_status(struct mlx5_core_dev *dev, enum mlx5_port_status status); int mlx5_query_port_status(struct mlx5_core_dev *dev, u8 *status); -int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu); -int mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, - u8 local_port); -int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, - u8 local_port); +int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu, u8 port); +void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, u8 port); +void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, + u8 port); + int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev, u8 *vl_hw_cap, u8 local_port); -- cgit v1.2.3 From fc11fbf9a785b25c5d07f05a30d4169ec39818da Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 11 Jun 2015 14:47:28 +0300 Subject: net/mlx5e: Add HW cacheline start padding Enable HW cacheline start padding and align RX WQE size to cacheline while considering HW start padding. Also, fix dma_unmap call to use the correct SKB data buffer size. Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index b2c43508a737..b943cd9e2097 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -131,6 +131,10 @@ enum { MLX5_INLINE_SEG = 0x80000000, }; +enum { + MLX5_HW_START_PADDING = MLX5_INLINE_SEG, +}; + enum { MLX5_MIN_PKEY_TABLE_SIZE = 128, MLX5_MAX_LOG_PKEY_TABLE = 5, -- cgit v1.2.3 From 833f32d763028c1bb371c64f457788b933773b3e Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 11 Jun 2015 15:54:55 -0700 Subject: time: Prevent early expiry of hrtimers[CLOCK_REALTIME] at the leap second edge Currently, leapsecond adjustments are done at tick time. As a result, the leapsecond was applied at the first timer tick *after* the leapsecond (~1-10ms late depending on HZ), rather then exactly on the second edge. This was in part historical from back when we were always tick based, but correcting this since has been avoided since it adds extra conditional checks in the gettime fastpath, which has performance overhead. However, it was recently pointed out that ABS_TIME CLOCK_REALTIME timers set for right after the leapsecond could fire a second early, since some timers may be expired before we trigger the timekeeping timer, which then applies the leapsecond. This isn't quite as bad as it sounds, since behaviorally it is similar to what is possible w/ ntpd made leapsecond adjustments done w/o using the kernel discipline. Where due to latencies, timers may fire just prior to the settimeofday call. (Also, one should note that all applications using CLOCK_REALTIME timers should always be careful, since they are prone to quirks from settimeofday() disturbances.) However, the purpose of having the kernel do the leap adjustment is to avoid such latencies, so I think this is worth fixing. So in order to properly keep those timers from firing a second early, this patch modifies the ntp and timekeeping logic so that we keep enough state so that the update_base_offsets_now accessor, which provides the hrtimer core the current time, can check and apply the leapsecond adjustment on the second edge. This prevents the hrtimer core from expiring timers too early. This patch does not modify any other time read path, so no additional overhead is incurred. However, this also means that the leap-second continues to be applied at tick time for all other read-paths. Apologies to Richard Cochran, who pushed for similar changes years ago, which I resisted due to the concerns about the performance overhead. While I suspect this isn't extremely critical, folks who care about strict leap-second correctness will likely want to watch this. Potentially a -stable candidate eventually. Originally-suggested-by: Richard Cochran Reported-by: Daniel Bristot de Oliveira Reported-by: Prarit Bhargava Signed-off-by: John Stultz Cc: Richard Cochran Cc: Jan Kara Cc: Jiri Bohac Cc: Shuah Khan Cc: Ingo Molnar Link: http://lkml.kernel.org/r/1434063297-28657-4-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- include/linux/time64.h | 1 + include/linux/timekeeper_internal.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/time64.h b/include/linux/time64.h index 12d4e82b0276..77b5df2acd2a 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -29,6 +29,7 @@ struct timespec64 { #define FSEC_PER_SEC 1000000000000000LL /* Located here for timespec[64]_valid_strict */ +#define TIME64_MAX ((s64)~((u64)1 << 63)) #define KTIME_MAX ((s64)~((u64)1 << 63)) #define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC) diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index e1f5a1136554..25247220b4b7 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -50,6 +50,7 @@ struct tk_read_base { * @offs_tai: Offset clock monotonic -> clock tai * @tai_offset: The current UTC to TAI offset in seconds * @clock_was_set_seq: The sequence number of clock was set events + * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second * @raw_time: Monotonic raw base time in timespec64 format * @cycle_interval: Number of clock cycles in one NTP interval * @xtime_interval: Number of clock shifted nano seconds in one NTP @@ -90,6 +91,7 @@ struct timekeeper { ktime_t offs_tai; s32 tai_offset; unsigned int clock_was_set_seq; + ktime_t next_leap_ktime; struct timespec64 raw_time; /* The following members are for timekeeping internal use */ -- cgit v1.2.3 From 3bf17472226b0041b0c61363bd57a5cadbe620c4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Jun 2015 13:20:29 +0800 Subject: iommu: dmar: Extend struct irte for VT-d Posted-Interrupts The IRTE (Interrupt Remapping Table Entry) is either an entry for remapped or for posted interrupts. The hardware distiguishes between remapped and posted entries by bit 15 in the low 64 bit of the IRTE. If cleared the entry is remapped, if set it's posted. The entries have common fields and dependent on the posted bit fields with different meanings. Extend struct irte to handle the differences between remap and posted mode by having three structs in the unions: - Shared - Remapped - Posted Signed-off-by: Thomas Gleixner Signed-off-by: Feng Wu Acked-by: Joerg Roedel Cc: jiang.liu@linux.intel.com Cc: iommu@lists.linux-foundation.org Cc: dwmw2@infradead.org Link: http://lkml.kernel.org/r/1433827237-3382-3-git-send-email-feng.wu@intel.com Signed-off-by: Thomas Gleixner --- include/linux/dmar.h | 70 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 55 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 84737565c1fd..0dbcabcb5f0d 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -185,33 +185,73 @@ static inline int dmar_device_remove(void *handle) struct irte { union { + /* Shared between remapped and posted mode*/ struct { - __u64 present : 1, - fpd : 1, - dst_mode : 1, - redir_hint : 1, - trigger_mode : 1, - dlvry_mode : 3, - avail : 4, - __reserved_1 : 4, - vector : 8, - __reserved_2 : 8, - dest_id : 32; + __u64 present : 1, /* 0 */ + fpd : 1, /* 1 */ + __res0 : 6, /* 2 - 6 */ + avail : 4, /* 8 - 11 */ + __res1 : 3, /* 12 - 14 */ + pst : 1, /* 15 */ + vector : 8, /* 16 - 23 */ + __res2 : 40; /* 24 - 63 */ + }; + + /* Remapped mode */ + struct { + __u64 r_present : 1, /* 0 */ + r_fpd : 1, /* 1 */ + dst_mode : 1, /* 2 */ + redir_hint : 1, /* 3 */ + trigger_mode : 1, /* 4 */ + dlvry_mode : 3, /* 5 - 7 */ + r_avail : 4, /* 8 - 11 */ + r_res0 : 4, /* 12 - 15 */ + r_vector : 8, /* 16 - 23 */ + r_res1 : 8, /* 24 - 31 */ + dest_id : 32; /* 32 - 63 */ + }; + + /* Posted mode */ + struct { + __u64 p_present : 1, /* 0 */ + p_fpd : 1, /* 1 */ + p_res0 : 6, /* 2 - 7 */ + p_avail : 4, /* 8 - 11 */ + p_res1 : 2, /* 12 - 13 */ + p_urgent : 1, /* 14 */ + p_pst : 1, /* 15 */ + p_vector : 8, /* 16 - 23 */ + p_res2 : 14, /* 24 - 37 */ + pda_l : 26; /* 38 - 63 */ }; __u64 low; }; union { + /* Shared between remapped and posted mode*/ struct { - __u64 sid : 16, - sq : 2, - svt : 2, - __reserved_3 : 44; + __u64 sid : 16, /* 64 - 79 */ + sq : 2, /* 80 - 81 */ + svt : 2, /* 82 - 83 */ + __res3 : 44; /* 84 - 127 */ + }; + + /* Posted mode*/ + struct { + __u64 p_sid : 16, /* 64 - 79 */ + p_sq : 2, /* 80 - 81 */ + p_svt : 2, /* 82 - 83 */ + p_res3 : 12, /* 84 - 95 */ + pda_h : 32; /* 96 - 127 */ }; __u64 high; }; }; +#define PDA_LOW_BIT 26 +#define PDA_HIGH_BIT 32 + enum { IRQ_REMAP_XAPIC_MODE, IRQ_REMAP_X2APIC_MODE, -- cgit v1.2.3 From bf56027ff4d9e75bf668ae990fe6204d00a23002 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Jun 2015 13:20:30 +0800 Subject: iommu: dmar: Provide helper to copy shared irte fields Instead of open coding, provide a helper function to copy the shared irte fields. Signed-off-by: Thomas Gleixner Cc: jiang.liu@linux.intel.com Cc: iommu@lists.linux-foundation.org Cc: joro@8bytes.org Cc: dwmw2@infradead.org Link: http://lkml.kernel.org/r/1433827237-3382-4-git-send-email-feng.wu@intel.com Signed-off-by: Thomas Gleixner --- include/linux/dmar.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 0dbcabcb5f0d..e9bc9292bd3a 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -249,6 +249,18 @@ struct irte { }; }; +static inline void dmar_copy_shared_irte(struct irte *dst, struct irte *src) +{ + dst->present = src->present; + dst->fpd = src->fpd; + dst->avail = src->avail; + dst->pst = src->pst; + dst->vector = src->vector; + dst->sid = src->sid; + dst->sq = src->sq; + dst->svt = src->svt; +} + #define PDA_LOW_BIT 26 #define PDA_HIGH_BIT 32 -- cgit v1.2.3 From 07c09787b26db724c94a912a572a9a4fa66008f3 Mon Sep 17 00:00:00 2001 From: Feng Wu Date: Tue, 9 Jun 2015 13:20:34 +0800 Subject: iommu, x86: Add cap_pi_support() to detect VT-d PI capability Add helper function to detect VT-d Posted-Interrupts capability. Signed-off-by: Feng Wu Reviewed-by: Jiang Liu Reviewed-by: Thomas Gleixner Acked-by: David Woodhouse Acked-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: dwmw2@infradead.org Link: http://lkml.kernel.org/r/1433827237-3382-8-git-send-email-feng.wu@intel.com Signed-off-by: Thomas Gleixner --- include/linux/intel-iommu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 0af9b03e2b1c..0c251be39836 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -87,6 +87,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) /* * Decoding Capability Register */ +#define cap_pi_support(c) (((c) >> 59) & 1) #define cap_read_drain(c) (((c) >> 55) & 1) #define cap_write_drain(c) (((c) >> 54) & 1) #define cap_max_amask_val(c) (((c) >> 48) & 0x3f) -- cgit v1.2.3 From b6a00fae9760a49114016e4764d09e522a5ba5b6 Mon Sep 17 00:00:00 2001 From: Tim Kryger Date: Tue, 26 May 2015 13:08:16 -0700 Subject: pwm: Add pwmchip_add_with_polarity() API Add a new function to register a PWM chip with channels that have their initial polarity as specified by an additional parameter. This benefits drivers of controllers that by default operate with inversed polarity by removing the need to modify the polarity during initialization. Signed-off-by: Tim Kryger Signed-off-by: Jonathan Richardson [thierry.reding@gmail.com: export pwmchip_add_with_polarity()] Signed-off-by: Thierry Reding --- include/linux/pwm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index cfe2d8df5be0..36262d08a9da 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -182,6 +182,8 @@ struct pwm_chip { int pwm_set_chip_data(struct pwm_device *pwm, void *data); void *pwm_get_chip_data(struct pwm_device *pwm); +int pwmchip_add_with_polarity(struct pwm_chip *chip, + enum pwm_polarity polarity); int pwmchip_add(struct pwm_chip *chip); int pwmchip_remove(struct pwm_chip *chip); struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip, @@ -217,6 +219,11 @@ static inline int pwmchip_add(struct pwm_chip *chip) return -EINVAL; } +static inline int pwmchip_add_inversed(struct pwm_chip *chip) +{ + return -EINVAL; +} + static inline int pwmchip_remove(struct pwm_chip *chip) { return -EINVAL; -- cgit v1.2.3 From 22a10bca280073f81e9e2d9fed6f90a3bcf00236 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 11 Jun 2015 17:37:03 -0700 Subject: regulator: Add system_load constraint Some regulators have a fixed load that isn't captured by consumers that the kernel knows about. Add a constraint to support this. Signed-off-by: Stephen Boyd Signed-off-by: Mark Brown --- include/linux/regulator/machine.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index b07562e082c4..01526559c8c3 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -75,6 +75,7 @@ struct regulator_state { * * @min_uA: Smallest current consumers may set. * @max_uA: Largest current consumers may set. + * @system_load: Load that isn't captured by any consumer requests. * * @valid_modes_mask: Mask of modes which may be configured by consumers. * @valid_ops_mask: Operations which may be performed by consumers. @@ -112,6 +113,8 @@ struct regulation_constraints { int min_uA; int max_uA; + int system_load; + /* valid regulator operating modes for this machine */ unsigned int valid_modes_mask; -- cgit v1.2.3 From 72b31f7271df34c6aab36c01305287924826678f Mon Sep 17 00:00:00 2001 From: Bernhard Thaler Date: Sat, 30 May 2015 15:27:40 +0200 Subject: netfilter: bridge: detect NAT66 correctly and change MAC address IPv4 iptables allows to REDIRECT/DNAT/SNAT any traffic over a bridge. e.g. REDIRECT $ sysctl -w net.bridge.bridge-nf-call-iptables=1 $ iptables -t nat -A PREROUTING -p tcp -m tcp --dport 8080 \ -j REDIRECT --to-ports 81 This does not work with ip6tables on a bridge in NAT66 scenario because the REDIRECT/DNAT/SNAT is not correctly detected. The bridge pre-routing (finish) netfilter hook has to check for a possible redirect and then fix the destination mac address. This allows to use the ip6tables rules for local REDIRECT/DNAT/SNAT REDIRECT similar to the IPv4 iptables version. e.g. REDIRECT $ sysctl -w net.bridge.bridge-nf-call-ip6tables=1 $ ip6tables -t nat -A PREROUTING -p tcp -m tcp --dport 8080 \ -j REDIRECT --to-ports 81 This patch makes it possible to use IPv6 NAT66 on a bridge. It was tested on a bridge with two interfaces using SNAT/DNAT NAT66 rules. Reported-by: Artie Hamilton Signed-off-by: Sven Eckelmann [bernhard.thaler@wvnet.at: rebased, add indirect call to ip6_route_input()] [bernhard.thaler@wvnet.at: rebased, split into separate patches] Signed-off-by: Bernhard Thaler Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6.h | 1 + include/linux/skbuff.h | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 64dad1cc1a4b..e2d19694ee8f 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -25,6 +25,7 @@ void ipv6_netfilter_fini(void); struct nf_ipv6_ops { int (*chk_addr)(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict); + void (*route_input)(struct sk_buff *skb); }; extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index cc612fc0a894..f70fc0e6bf7b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -36,6 +36,7 @@ #include #include #include +#include /* A. Checksumming of received packets by device. * @@ -179,7 +180,10 @@ struct nf_bridge_info { struct net_device *physoutdev; char neigh_header[8]; }; - __be32 ipv4_daddr; + union { + __be32 ipv4_daddr; + struct in6_addr ipv6_daddr; + }; }; #endif -- cgit v1.2.3 From 411ffb4fde80705a9a8db4c2d38dbeef6f5bd689 Mon Sep 17 00:00:00 2001 From: Bernhard Thaler Date: Sat, 30 May 2015 15:28:28 +0200 Subject: netfilter: bridge: refactor frag_max_size Currently frag_max_size is member of br_input_skb_cb and copied back and forth using IPCB(skb) and BR_INPUT_SKB_CB(skb) each time it is changed or used. Attach frag_max_size to nf_bridge_info and set value in pre_routing and forward functions. Use its value in forward and xmit functions. Signed-off-by: Bernhard Thaler Signed-off-by: Pablo Neira Ayuso --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f70fc0e6bf7b..32b105e682b3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -174,6 +174,7 @@ struct nf_bridge_info { BRNF_PROTO_PPPOE } orig_proto:8; bool pkt_otherhost; + __u16 frag_max_size; unsigned int mask; struct net_device *physindev; union { -- cgit v1.2.3 From 23c779b9f9161d6568d3b2fca06e70ad182c480c Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 11 Jun 2015 17:37:04 -0700 Subject: regulator: Add pull down support Some regulators need to be configured to pull down a resistor when the regulator is disabled. Add an op (set_pull_down) and a DT property + constraint to support this. Signed-off-by: Stephen Boyd Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 5 +++++ include/linux/regulator/machine.h | 2 ++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index fffa688ac3a7..76144a337ff7 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -121,6 +121,9 @@ struct regulator_linear_range { * @set_suspend_mode: Set the operating mode for the regulator when the * system is suspended. * + * @set_pull_down: Configure the regulator to pull down when the regulator + * is disabled. + * * This struct describes regulator operations which can be implemented by * regulator chip drivers. */ @@ -187,6 +190,8 @@ struct regulator_ops { /* set regulator suspend operating mode (defined in consumer.h) */ int (*set_suspend_mode) (struct regulator_dev *, unsigned int mode); + + int (*set_pull_down) (struct regulator_dev *); }; /* diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index 01526559c8c3..8ffb0619a03c 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -87,6 +87,7 @@ struct regulator_state { * applied. * @apply_uV: Apply the voltage constraint when initialising. * @ramp_disable: Disable ramp delay when initialising or when setting voltage. + * @pull_down: Enable pull down when regulator is disabled. * * @input_uV: Input voltage for regulator when supplied by another regulator. * @@ -141,6 +142,7 @@ struct regulation_constraints { unsigned boot_on:1; /* bootloader/firmware enabled regulator */ unsigned apply_uV:1; /* apply uV constraint if min == max */ unsigned ramp_disable:1; /* disable ramp delay */ + unsigned pull_down:1; /* pull down resistor when regulator off */ }; /** -- cgit v1.2.3 From efb6de9b4ba0092b2c55f6a52d16294a8a698edd Mon Sep 17 00:00:00 2001 From: Bernhard Thaler Date: Sat, 30 May 2015 15:30:16 +0200 Subject: netfilter: bridge: forward IPv6 fragmented packets IPv6 fragmented packets are not forwarded on an ethernet bridge with netfilter ip6_tables loaded. e.g. steps to reproduce 1) create a simple bridge like this modprobe br_netfilter brctl addbr br0 brctl addif br0 eth0 brctl addif br0 eth2 ifconfig eth0 up ifconfig eth2 up ifconfig br0 up 2) place a host with an IPv6 address on each side of the bridge set IPv6 address on host A: ip -6 addr add fd01:2345:6789:1::1/64 dev eth0 set IPv6 address on host B: ip -6 addr add fd01:2345:6789:1::2/64 dev eth0 3) run a simple ping command on host A with packets > MTU ping6 -s 4000 fd01:2345:6789:1::2 4) wait some time and run e.g. "ip6tables -t nat -nvL" on the bridge IPv6 fragmented packets traverse the bridge cleanly until somebody runs. "ip6tables -t nat -nvL". As soon as it is run (and netfilter modules are loaded) IPv6 fragmented packets do not traverse the bridge any more (you see no more responses in ping's output). After applying this patch IPv6 fragmented packets traverse the bridge cleanly in above scenario. Signed-off-by: Bernhard Thaler [pablo@netfilter.org: small changes to br_nf_dev_queue_xmit] Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index e2d19694ee8f..8b7d28f3aada 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -26,6 +26,8 @@ struct nf_ipv6_ops { int (*chk_addr)(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict); void (*route_input)(struct sk_buff *skb); + int (*fragment)(struct sock *sk, struct sk_buff *skb, + int (*output)(struct sock *, struct sk_buff *)); }; extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops; -- cgit v1.2.3 From 33b1f31392861947fa2a2a57c3a39ab63b8c9f9d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 5 Jun 2015 13:28:38 +0200 Subject: net: ip_fragment: remove BRIDGE_NETFILTER mtu special handling since commit d6b915e29f4adea9 ("ip_fragment: don't forward defragmented DF packet") the largest fragment size is available in the IPCB. Therefore we no longer need to care about 'encapsulation' overhead of stripped PPPOE/VLAN headers since ip_do_fragment doesn't use device mtu in such cases. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index f2fdb5a52070..6d80fc686323 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -20,13 +20,6 @@ enum nf_br_hook_priorities { #define BRNF_BRIDGED_DNAT 0x02 #define BRNF_NF_BRIDGE_PREROUTING 0x08 -static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) -{ - if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE) - return PPPOE_SES_HLEN; - return 0; -} - int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb); static inline void br_drop_fake_rtable(struct sk_buff *skb) -- cgit v1.2.3 From 57f66b78860968fc7eddc9ce25f8e57f7e5000bd Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 11 Jun 2015 17:37:05 -0700 Subject: regulator: Add soft start support Some regulators support a "soft start" feature where the voltage ramps up slowly when the regulator is enabled. Add an op (set_soft_start) and a DT property + constraint to support this. Signed-off-by: Stephen Boyd Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 2 ++ include/linux/regulator/machine.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 76144a337ff7..e0635d0894aa 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -161,6 +161,8 @@ struct regulator_ops { unsigned int old_selector, unsigned int new_selector); + int (*set_soft_start) (struct regulator_dev *); + /* report regulator status ... most other accessors report * control inputs, this reports results of combining inputs * from Linux (and other sources) with the actual load. diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index 8ffb0619a03c..7f7d0a3fe1e1 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -142,6 +142,7 @@ struct regulation_constraints { unsigned boot_on:1; /* bootloader/firmware enabled regulator */ unsigned apply_uV:1; /* apply uV constraint if min == max */ unsigned ramp_disable:1; /* disable ramp delay */ + unsigned soft_start:1; /* ramp voltage slowly */ unsigned pull_down:1; /* pull down resistor when regulator off */ }; -- cgit v1.2.3 From 36e4f839de59b6216a16cdf5c1d3263f4dbd9421 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 11 Jun 2015 17:37:06 -0700 Subject: regulator: Add input current limit support Some regulators can limit their input current (typically annotated as ilim). Add an op (set_input_current_limit) and a DT property + constraint to support this. Signed-off-by: Stephen Boyd Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 3 +++ include/linux/regulator/machine.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index e0635d0894aa..125264f8be93 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -91,6 +91,7 @@ struct regulator_linear_range { * @set_current_limit: Configure a limit for a current-limited regulator. * The driver should select the current closest to max_uA. * @get_current_limit: Get the configured limit for a current-limited regulator. + * @set_input_current_limit: Configure an input limit. * * @set_mode: Set the configured operating mode for the regulator. * @get_mode: Get the configured operating mode for the regulator. @@ -145,6 +146,8 @@ struct regulator_ops { int min_uA, int max_uA); int (*get_current_limit) (struct regulator_dev *); + int (*set_input_current_limit) (struct regulator_dev *, int lim_uA); + /* enable/disable regulator */ int (*enable) (struct regulator_dev *); int (*disable) (struct regulator_dev *); diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index 7f7d0a3fe1e1..85a3b457de51 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -75,6 +75,7 @@ struct regulator_state { * * @min_uA: Smallest current consumers may set. * @max_uA: Largest current consumers may set. + * @ilim_uA: Maximum input current. * @system_load: Load that isn't captured by any consumer requests. * * @valid_modes_mask: Mask of modes which may be configured by consumers. @@ -113,6 +114,7 @@ struct regulation_constraints { /* current output range (inclusive) - for current control */ int min_uA; int max_uA; + int ilim_uA; int system_load; -- cgit v1.2.3 From 71ae0dff02d756e4d2ca710b79f2ff5390029a5f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Jun 2015 01:34:54 +0200 Subject: netfilter: xtables: use percpu rule counters The binary arp/ip/ip6tables ruleset is stored per cpu. The only reason left as to why we need percpu duplication are the rule counters embedded into ipt_entry et al -- since each cpu has its own copy of the rules, all counters can be lockless. The downside is that the more cpus are supported, the more memory is required. Rules are not just duplicated per online cpu but for each possible cpu, i.e. if maxcpu is 144, then rule is duplicated 144 times, not for the e.g. 64 cores present. To save some memory and also improve utilization of shared caches it would be preferable to only store the rule blob once. So we first need to separate counters and the rule blob. Instead of using entry->counters, allocate this percpu and store the percpu address in entry->counters.pcnt on CONFIG_SMP. This change makes no sense as-is; it is merely an intermediate step to remove the percpu duplication of the rule set in a followup patch. Suggested-by: Eric Dumazet Acked-by: Jesper Dangaard Brouer Reported-by: Marcelo Ricardo Leitner Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 49 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 09f38206c18f..b77ab9f17641 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -353,6 +353,55 @@ static inline unsigned long ifname_compare_aligned(const char *_a, return ret; } + +/* On SMP, ip(6)t_entry->counters.pcnt holds address of the + * real (percpu) counter. On !SMP, its just the packet count, + * so nothing needs to be done there. + * + * xt_percpu_counter_alloc returns the address of the percpu + * counter, or 0 on !SMP. + * + * Hence caller must use IS_ERR_VALUE to check for error, this + * allows us to return 0 for single core systems without forcing + * callers to deal with SMP vs. NONSMP issues. + */ +static inline u64 xt_percpu_counter_alloc(void) +{ + if (nr_cpu_ids > 1) { + void __percpu *res = alloc_percpu(struct xt_counters); + + if (res == NULL) + return (u64) -ENOMEM; + + return (__force u64) res; + } + + return 0; +} +static inline void xt_percpu_counter_free(u64 pcnt) +{ + if (nr_cpu_ids > 1) + free_percpu((void __percpu *) pcnt); +} + +static inline struct xt_counters * +xt_get_this_cpu_counter(struct xt_counters *cnt) +{ + if (nr_cpu_ids > 1) + return this_cpu_ptr((void __percpu *) cnt->pcnt); + + return cnt; +} + +static inline struct xt_counters * +xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu) +{ + if (nr_cpu_ids > 1) + return per_cpu_ptr((void __percpu *) cnt->pcnt, cpu); + + return cnt; +} + struct nf_hook_ops *xt_hook_link(const struct xt_table *, nf_hookfn *); void xt_hook_unlink(const struct xt_table *, struct nf_hook_ops *); -- cgit v1.2.3 From 482cfc318559e2527dfd8513582d2fdb276e47c2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Jun 2015 01:34:55 +0200 Subject: netfilter: xtables: avoid percpu ruleset duplication We store the rule blob per (possible) cpu. Unfortunately this means we can waste lot of memory on big smp machines. ipt_entry structure ('rule head') is 112 byte, so e.g. with maxcpu=64 one single rule eats close to 8k RAM. Since previous patch made counters percpu it appears there is nothing left in the rule blob that needs to be percpu. On my test system (144 possible cpus, 400k dummy rules) this change saves close to 9 Gigabyte of RAM. Reported-by: Marcelo Ricardo Leitner Acked-by: Jesper Dangaard Brouer Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index b77ab9f17641..9969d79dcde1 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -224,9 +224,9 @@ struct xt_table_info { unsigned int stacksize; unsigned int __percpu *stackptr; void ***jumpstack; - /* ipt_entry tables: one per CPU */ + /* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */ - void *entries[1]; + void *entries; }; #define XT_TABLE_INFO_SZ (offsetof(struct xt_table_info, entries) \ -- cgit v1.2.3 From 87d001ef5366c4a24f7a1340246c4ce68190581c Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 27 May 2015 16:01:52 +0200 Subject: dmaengine: Move icg helpers to global header Now that we can have ICGs set for both the source and destination (using the icg field of struct data_chunk) or for only the source or the destination (using the dst_icg or src_icg respectively), and that these fields can be ignored depending on other parameters (src_inc, src_sgl, etc.), the logic to get the actual ICG value can be quite tricky. The XDMAC driver was already implementing it, but since we will need it in other drivers, we can move it to the main header file. Signed-off-by: Maxime Ripard Acked-by: Ludovic Desroches Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index ad419757241f..499c530bcbaa 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -874,6 +874,33 @@ static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags) BUG(); } +static inline size_t dmaengine_get_icg(bool inc, bool sgl, size_t icg, + size_t dir_icg) +{ + if (inc) { + if (dir_icg) + return dir_icg; + else if (sgl) + return icg; + } + + return 0; +} + +static inline size_t dmaengine_get_dst_icg(struct dma_interleaved_template *xt, + struct data_chunk *chunk) +{ + return dmaengine_get_icg(xt->dst_inc, xt->dst_sgl, + chunk->icg, chunk->dst_icg); +} + +static inline size_t dmaengine_get_src_icg(struct dma_interleaved_template *xt, + struct data_chunk *chunk) +{ + return dmaengine_get_icg(xt->src_inc, xt->src_sgl, + chunk->icg, chunk->src_icg); +} + /* --- public DMA engine API --- */ #ifdef CONFIG_DMA_ENGINE -- cgit v1.2.3 From 4983a501afede12f95d26e1e213f8f2e9eda1871 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 18 May 2015 13:46:15 +0200 Subject: dmaengine: Revert "drivers/dma: remove unused support for MEMSET operations" This reverts commit 48a9db462d99494583dad829969616ac90a8df4e. Some platforms actually need support for the memset operations. Bring it back. Signed-off-by: Maxime Ripard Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index ad419757241f..19face3168b4 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -65,6 +65,7 @@ enum dma_transaction_type { DMA_PQ, DMA_XOR_VAL, DMA_PQ_VAL, + DMA_MEMSET, DMA_INTERRUPT, DMA_SG, DMA_PRIVATE, @@ -570,6 +571,7 @@ struct dma_tx_state { * @copy_align: alignment shift for memcpy operations * @xor_align: alignment shift for xor operations * @pq_align: alignment shift for pq operations + * @fill_align: alignment shift for memset operations * @dev_id: unique device ID * @dev: struct device reference for dma mapping api * @src_addr_widths: bit mask of src addr widths the device supports @@ -588,6 +590,7 @@ struct dma_tx_state { * @device_prep_dma_xor_val: prepares a xor validation operation * @device_prep_dma_pq: prepares a pq operation * @device_prep_dma_pq_val: prepares a pqzero_sum operation + * @device_prep_dma_memset: prepares a memset operation * @device_prep_dma_interrupt: prepares an end of chain interrupt operation * @device_prep_slave_sg: prepares a slave dma operation * @device_prep_dma_cyclic: prepare a cyclic dma operation suitable for audio. @@ -620,6 +623,7 @@ struct dma_device { u8 copy_align; u8 xor_align; u8 pq_align; + u8 fill_align; #define DMA_HAS_PQ_CONTINUE (1 << 15) int dev_id; @@ -650,6 +654,9 @@ struct dma_device { struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, unsigned int src_cnt, const unsigned char *scf, size_t len, enum sum_check_flags *pqres, unsigned long flags); + struct dma_async_tx_descriptor *(*device_prep_dma_memset)( + struct dma_chan *chan, dma_addr_t dest, int value, size_t len, + unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)( struct dma_chan *chan, unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_sg)( @@ -745,6 +752,17 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_interleaved_dma( return chan->device->device_prep_interleaved_dma(chan, xt, flags); } +static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memset( + struct dma_chan *chan, dma_addr_t dest, int value, size_t len, + unsigned long flags) +{ + if (!chan || !chan->device) + return NULL; + + return chan->device->device_prep_dma_memset(chan, dest, value, + len, flags); +} + static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_sg( struct dma_chan *chan, struct scatterlist *dst_sg, unsigned int dst_nents, @@ -820,6 +838,12 @@ static inline bool is_dma_pq_aligned(struct dma_device *dev, size_t off1, return dmaengine_check_align(dev->pq_align, off1, off2, len); } +static inline bool is_dma_fill_aligned(struct dma_device *dev, size_t off1, + size_t off2, size_t len) +{ + return dmaengine_check_align(dev->fill_align, off1, off2, len); +} + static inline void dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue) { -- cgit v1.2.3 From 7bbf1dd24b17b9ec4f47c43ce4e05bf190745553 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 1 Jun 2015 16:05:10 +0800 Subject: genirq: Enhance irq_data_to_desc() to support hierarchy irqdomain For irq associated with hierarchy irqdomains, there will be multiple irq_datas for one irq_desc. So enhance irq_data_to_desc() to support hierarchy irqdomain. Also export irq_data_to_desc() as an inline function for later reuse. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Marc Zyngier Link: http://lkml.kernel.org/r/1433145945-789-2-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- include/linux/irqdesc.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index dd1109fb241e..a113a8dc7438 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -93,6 +93,15 @@ struct irq_desc { extern struct irq_desc irq_desc[NR_IRQS]; #endif +static inline struct irq_desc *irq_data_to_desc(struct irq_data *data) +{ +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + return irq_to_desc(data->irq); +#else + return container_of(data, struct irq_desc, irq_data); +#endif +} + static inline struct irq_data *irq_desc_get_irq_data(struct irq_desc *desc) { return &desc->irq_data; -- cgit v1.2.3 From 0d0b4c866bcce647f40d73efe5e90aeeb079050a Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 1 Jun 2015 16:05:12 +0800 Subject: genirq: Introduce struct irq_common_data to host shared irq data With the introduction of hierarchy irqdomain, struct irq_data becomes per-chip instead of per-irq and there may be multiple irq_datas associated with the same irq. Some per-irq data stored in struct irq_data now may get duplicated into multiple irq_datas, and causes inconsistent view. So introduce struct irq_common_data to host per-irq common data and to achieve consistent view among irq_chips. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Jason Cooper Cc: Kevin Cernekee Cc: Arnd Bergmann Cc: Marc Zyngier Link: http://lkml.kernel.org/r/1433145945-789-4-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 54 +++++++++++++++++++++++++++++-------------------- include/linux/irqdesc.h | 3 ++- 2 files changed, 34 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 48cb7d1aa58f..3c7fbe44edae 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -126,13 +126,21 @@ struct msi_desc; struct irq_domain; /** - * struct irq_data - per irq and irq chip data passed down to chip functions + * struct irq_common_data - per irq data shared by all irqchips + * @state_use_accessors: status information for irq chip functions. + * Use accessor functions to deal with it + */ +struct irq_common_data { + unsigned int state_use_accessors; +}; + +/** + * struct irq_data - per irq chip data passed down to chip functions * @mask: precomputed bitmask for accessing the chip registers * @irq: interrupt number * @hwirq: hardware interrupt number, local to the interrupt domain * @node: node index useful for balancing - * @state_use_accessors: status information for irq chip functions. - * Use accessor functions to deal with it + * @common: point to data shared by all irqchips * @chip: low level interrupt hardware access * @domain: Interrupt translation domain; responsible for mapping * between hwirq number and linux irq number. @@ -153,7 +161,7 @@ struct irq_data { unsigned int irq; unsigned long hwirq; unsigned int node; - unsigned int state_use_accessors; + struct irq_common_data *common; struct irq_chip *chip; struct irq_domain *domain; #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY @@ -166,7 +174,7 @@ struct irq_data { }; /* - * Bit masks for irq_data.state + * Bit masks for irq_common_data.state_use_accessors * * IRQD_TRIGGER_MASK - Mask for the trigger type bits * IRQD_SETAFFINITY_PENDING - Affinity setting is pending @@ -198,34 +206,36 @@ enum { IRQD_WAKEUP_ARMED = (1 << 19), }; +#define __irqd_to_state(d) ((d)->common->state_use_accessors) + static inline bool irqd_is_setaffinity_pending(struct irq_data *d) { - return d->state_use_accessors & IRQD_SETAFFINITY_PENDING; + return __irqd_to_state(d) & IRQD_SETAFFINITY_PENDING; } static inline bool irqd_is_per_cpu(struct irq_data *d) { - return d->state_use_accessors & IRQD_PER_CPU; + return __irqd_to_state(d) & IRQD_PER_CPU; } static inline bool irqd_can_balance(struct irq_data *d) { - return !(d->state_use_accessors & (IRQD_PER_CPU | IRQD_NO_BALANCING)); + return !(__irqd_to_state(d) & (IRQD_PER_CPU | IRQD_NO_BALANCING)); } static inline bool irqd_affinity_was_set(struct irq_data *d) { - return d->state_use_accessors & IRQD_AFFINITY_SET; + return __irqd_to_state(d) & IRQD_AFFINITY_SET; } static inline void irqd_mark_affinity_was_set(struct irq_data *d) { - d->state_use_accessors |= IRQD_AFFINITY_SET; + __irqd_to_state(d) |= IRQD_AFFINITY_SET; } static inline u32 irqd_get_trigger_type(struct irq_data *d) { - return d->state_use_accessors & IRQD_TRIGGER_MASK; + return __irqd_to_state(d) & IRQD_TRIGGER_MASK; } /* @@ -233,43 +243,43 @@ static inline u32 irqd_get_trigger_type(struct irq_data *d) */ static inline void irqd_set_trigger_type(struct irq_data *d, u32 type) { - d->state_use_accessors &= ~IRQD_TRIGGER_MASK; - d->state_use_accessors |= type & IRQD_TRIGGER_MASK; + __irqd_to_state(d) &= ~IRQD_TRIGGER_MASK; + __irqd_to_state(d) |= type & IRQD_TRIGGER_MASK; } static inline bool irqd_is_level_type(struct irq_data *d) { - return d->state_use_accessors & IRQD_LEVEL; + return __irqd_to_state(d) & IRQD_LEVEL; } static inline bool irqd_is_wakeup_set(struct irq_data *d) { - return d->state_use_accessors & IRQD_WAKEUP_STATE; + return __irqd_to_state(d) & IRQD_WAKEUP_STATE; } static inline bool irqd_can_move_in_process_context(struct irq_data *d) { - return d->state_use_accessors & IRQD_MOVE_PCNTXT; + return __irqd_to_state(d) & IRQD_MOVE_PCNTXT; } static inline bool irqd_irq_disabled(struct irq_data *d) { - return d->state_use_accessors & IRQD_IRQ_DISABLED; + return __irqd_to_state(d) & IRQD_IRQ_DISABLED; } static inline bool irqd_irq_masked(struct irq_data *d) { - return d->state_use_accessors & IRQD_IRQ_MASKED; + return __irqd_to_state(d) & IRQD_IRQ_MASKED; } static inline bool irqd_irq_inprogress(struct irq_data *d) { - return d->state_use_accessors & IRQD_IRQ_INPROGRESS; + return __irqd_to_state(d) & IRQD_IRQ_INPROGRESS; } static inline bool irqd_is_wakeup_armed(struct irq_data *d) { - return d->state_use_accessors & IRQD_WAKEUP_ARMED; + return __irqd_to_state(d) & IRQD_WAKEUP_ARMED; } @@ -280,12 +290,12 @@ static inline bool irqd_is_wakeup_armed(struct irq_data *d) */ static inline void irqd_set_chained_irq_inprogress(struct irq_data *d) { - d->state_use_accessors |= IRQD_IRQ_INPROGRESS; + __irqd_to_state(d) |= IRQD_IRQ_INPROGRESS; } static inline void irqd_clr_chained_irq_inprogress(struct irq_data *d) { - d->state_use_accessors &= ~IRQD_IRQ_INPROGRESS; + __irqd_to_state(d) &= ~IRQD_IRQ_INPROGRESS; } static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index a113a8dc7438..c52d1480f272 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -17,7 +17,7 @@ struct pt_regs; /** * struct irq_desc - interrupt descriptor - * @irq_data: per irq and chip data passed down to chip functions + * @irq_common_data: per irq and chip data passed down to chip functions * @kstat_irqs: irq stats per cpu * @handle_irq: highlevel irq-events handler * @preflow_handler: handler called before the flow handler (currently used by sparc) @@ -47,6 +47,7 @@ struct pt_regs; * @name: flow handler name for /proc/interrupts output */ struct irq_desc { + struct irq_common_data irq_common_data; struct irq_data irq_data; unsigned int __percpu *kstat_irqs; irq_flow_handler_t handle_irq; -- cgit v1.2.3 From 6783011b48096b9a0c239d0f7645f93070b6eefd Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 1 Jun 2015 16:05:13 +0800 Subject: genirq: Introduce helper function irq_data_get_node() Introduce helper function irq_data_get_node() and variants thereof to hide struct irq_data implementation details. Convert the core code to use them. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Jason Cooper Cc: Kevin Cernekee Cc: Arnd Bergmann Link: http://lkml.kernel.org/r/1433145945-789-5-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 3c7fbe44edae..b3b82a5344c8 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -640,6 +640,11 @@ static inline u32 irq_get_trigger_type(unsigned int irq) return d ? irqd_get_trigger_type(d) : 0; } +static inline int irq_data_get_node(struct irq_data *d) +{ + return d->node; +} + unsigned int arch_dynirq_lower_bound(unsigned int from); int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, -- cgit v1.2.3 From c64301a230a64dfc2fcf4581cd98a2d703f3c057 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 1 Jun 2015 16:05:23 +0800 Subject: genirq: Introduce helper function irq_data_get_affinity_mask() Introduce helper function irq_data_get_affinity_mask() and irq_get_affinity_mask() to hide implementation details, so we could move field 'affinity' from struct irq_data into struct irq_common_data later. Signed-off-by: Jiang Liu Acked-by: Russell King Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Jason Cooper Cc: Kevin Cernekee Cc: Arnd Bergmann Link: http://lkml.kernel.org/r/1433145945-789-15-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index b3b82a5344c8..1e0ccef205ed 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -645,6 +645,18 @@ static inline int irq_data_get_node(struct irq_data *d) return d->node; } +static inline struct cpumask *irq_get_affinity_mask(int irq) +{ + struct irq_data *d = irq_get_irq_data(irq); + + return d ? d->affinity : NULL; +} + +static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d) +{ + return d->affinity; +} + unsigned int arch_dynirq_lower_bound(unsigned int from); int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, -- cgit v1.2.3 From 7e53df111beea8db2543424d07bdee2a630698c3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 26 May 2015 11:53:04 -0400 Subject: xprtrdma: Remove rpcrdma_ia::ri_memreg_strategy Clean up: This field is no longer used. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Reviewed-by: Sagi Grimberg Reviewed-by: Devesh Sharma Tested-By: Devesh Sharma Reviewed-by: Doug Ledford Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprtrdma.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index c984c85981ea..b17613052cc3 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h @@ -56,7 +56,8 @@ #define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */ -/* memory registration strategies */ +/* Memory registration strategies, by number. + * This is part of a kernel / user space API. Do not remove. */ enum rpcrdma_memreg { RPCRDMA_BOUNCEBUFFERS = 0, RPCRDMA_REGISTER, -- cgit v1.2.3 From 52033cfb5aab2a54e238e93c9e52f61c2c5708aa Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 11 Jun 2015 16:35:26 +0300 Subject: IB/mlx4: Add mmap call to map the hardware clock In order to read the HCA's cycle counter efficiently in user space, we need to map the HCA's register. This is done through mmap call. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: Doug Ledford --- include/linux/mlx4/device.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 83e80ab94500..f94984fb8bb2 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -829,6 +829,12 @@ struct mlx4_dev { struct mlx4_vf_dev *dev_vfs; }; +struct mlx4_clock_params { + u64 offset; + u8 bar; + u8 size; +}; + struct mlx4_eqe { u8 reserved1; u8 type; @@ -1485,4 +1491,7 @@ int mlx4_ACCESS_PTYS_REG(struct mlx4_dev *dev, enum mlx4_access_reg_method method, struct mlx4_ptys_reg *ptys_reg); +int mlx4_get_internal_clock_params(struct mlx4_dev *dev, + struct mlx4_clock_params *params); + #endif /* MLX4_DEVICE_H */ -- cgit v1.2.3 From c0300089fd2dbeebef5ab9b6d66b4e6cedf8500a Mon Sep 17 00:00:00 2001 From: Yijing Wang Date: Tue, 28 Apr 2015 17:32:34 +0800 Subject: PCI: Remove unused pci_scan_bus_parented() No one uses pci_scan_bus_parented() any more, remove it. Signed-off-by: Yijing Wang Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 353db8dc4c6e..1ec0d5d9723c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -773,8 +773,6 @@ void pcibios_bus_to_resource(struct pci_bus *bus, struct resource *res, void pcibios_scan_specific_bus(int busn); struct pci_bus *pci_find_bus(int domain, int busnr); void pci_bus_add_devices(const struct pci_bus *bus); -struct pci_bus *pci_scan_bus_parented(struct device *parent, int bus, - struct pci_ops *ops, void *sysdata); struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata); struct pci_bus *pci_create_root_bus(struct device *parent, int bus, struct pci_ops *ops, void *sysdata, -- cgit v1.2.3 From 73b6ecdb93e8e77752cae9077c424fcdc6f23c39 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Fri, 12 Jun 2015 11:10:06 +0900 Subject: extcon: Redefine the unique id of supported external connectors without 'enum extcon' type This patch just redefine the unique id of supported external connectors without 'enum extcon' type. Because unique id would be used on devictree file(*.dts) to indicate the specific external connectors like key number of input framework. So, I have the plan to move this definitions to following header file which includes the unique id of supported external connectors. - include/dt-bindings/extcon/extcon.h Fixes: 2a9de9c0f08d ("extcon: Use the unique id for external connector instead of string") Signed-off-by: Chanwoo Choi Signed-off-by: Greg Kroah-Hartman --- include/linux/extcon.h | 90 +++++++++++++++++++++++--------------------------- 1 file changed, 42 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index a7b224b20ecc..b16d929fa75f 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -30,41 +30,35 @@ #include #include -enum extcon { - EXTCON_NONE = 0x0, - - /* USB external connector */ - EXTCON_USB = 0x1, - EXTCON_USB_HOST = 0x2, - - /* Charger external connector */ - EXTCON_TA = 0x10, - EXTCON_FAST_CHARGER = 0x11, - EXTCON_SLOW_CHARGER = 0x12, - EXTCON_CHARGE_DOWNSTREAM = 0x13, - - /* Audio/Video external connector */ - EXTCON_LINE_IN = 0x20, - EXTCON_LINE_OUT = 0x21, - EXTCON_MICROPHONE = 0x22, - EXTCON_HEADPHONE = 0x23, - - EXTCON_HDMI = 0x30, - EXTCON_MHL = 0x31, - EXTCON_DVI = 0x32, - EXTCON_VGA = 0x33, - EXTCON_SPDIF_IN = 0x34, - EXTCON_SPDIF_OUT = 0x35, - EXTCON_VIDEO_IN = 0x36, - EXTCON_VIDEO_OUT = 0x37, - - /* Etc external connector */ - EXTCON_DOCK = 0x50, - EXTCON_JIG = 0x51, - EXTCON_MECHANICAL = 0x52, - - EXTCON_END, -}; +/* + * Define the unique id of supported external connectors + */ +#define EXTCON_NONE 0 + +#define EXTCON_USB 1 /* USB connector */ +#define EXTCON_USB_HOST 2 + +#define EXTCON_TA 3 /* Charger connector */ +#define EXTCON_FAST_CHARGER 4 +#define EXTCON_SLOW_CHARGER 5 +#define EXTCON_CHARGE_DOWNSTREAM 6 + +#define EXTCON_LINE_IN 7 /* Audio/Video connector */ +#define EXTCON_LINE_OUT 8 +#define EXTCON_MICROPHONE 9 +#define EXTCON_HEADPHONE 10 +#define EXTCON_HDMI 11 +#define EXTCON_MHL 12 +#define EXTCON_DVI 13 +#define EXTCON_VGA 14 +#define EXTCON_SPDIF_IN 15 +#define EXTCON_SPDIF_OUT 16 +#define EXTCON_VIDEO_IN 17 +#define EXTCON_VIDEO_OUT 18 + +#define EXTCON_DOCK 19 /* Misc connector */ +#define EXTCON_JIG 20 +#define EXTCON_MECHANICAL 21 struct extcon_cable; @@ -105,7 +99,7 @@ struct extcon_cable; struct extcon_dev { /* Optional user initializing data */ const char *name; - const enum extcon *supported_cable; + const unsigned int *supported_cable; const u32 *mutually_exclusive; /* Optional callbacks to override class functions */ @@ -182,10 +176,10 @@ extern struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name); /* * Following APIs control the memory of extcon device. */ -extern struct extcon_dev *extcon_dev_allocate(const enum extcon *cable); +extern struct extcon_dev *extcon_dev_allocate(const unsigned int *cable); extern void extcon_dev_free(struct extcon_dev *edev); extern struct extcon_dev *devm_extcon_dev_allocate(struct device *dev, - const enum extcon *cable); + const unsigned int *cable); extern void devm_extcon_dev_free(struct device *dev, struct extcon_dev *edev); /* @@ -206,8 +200,8 @@ extern int extcon_update_state(struct extcon_dev *edev, u32 mask, u32 state); * get/set_cable_state access each bit of the 32b encoded state value. * They are used to access the status of each cable based on the cable_name. */ -extern int extcon_get_cable_state_(struct extcon_dev *edev, enum extcon id); -extern int extcon_set_cable_state_(struct extcon_dev *edev, enum extcon id, +extern int extcon_get_cable_state_(struct extcon_dev *edev, unsigned int id); +extern int extcon_set_cable_state_(struct extcon_dev *edev, unsigned int id, bool cable_state); extern int extcon_get_cable_state(struct extcon_dev *edev, @@ -234,9 +228,9 @@ extern int extcon_unregister_interest(struct extcon_specific_cable_nb *nb); * we do not recommend to use this for normal 'notifiee' device drivers who * want to be notified by a specific external port of the notifier. */ -extern int extcon_register_notifier(struct extcon_dev *edev, enum extcon id, +extern int extcon_register_notifier(struct extcon_dev *edev, unsigned int id, struct notifier_block *nb); -extern int extcon_unregister_notifier(struct extcon_dev *edev, enum extcon id, +extern int extcon_unregister_notifier(struct extcon_dev *edev, unsigned int id, struct notifier_block *nb); /* @@ -266,7 +260,7 @@ static inline int devm_extcon_dev_register(struct device *dev, static inline void devm_extcon_dev_unregister(struct device *dev, struct extcon_dev *edev) { } -static inline struct extcon_dev *extcon_dev_allocate(const enum extcon *cable) +static inline struct extcon_dev *extcon_dev_allocate(const unsigned int *cable) { return ERR_PTR(-ENOSYS); } @@ -274,7 +268,7 @@ static inline struct extcon_dev *extcon_dev_allocate(const enum extcon *cable) static inline void extcon_dev_free(struct extcon_dev *edev) { } static inline struct extcon_dev *devm_extcon_dev_allocate(struct device *dev, - const enum extcon *cable) + const unsigned int *cable) { return ERR_PTR(-ENOSYS); } @@ -298,13 +292,13 @@ static inline int extcon_update_state(struct extcon_dev *edev, u32 mask, } static inline int extcon_get_cable_state_(struct extcon_dev *edev, - enum extcon id) + unsigned int id) { return 0; } static inline int extcon_set_cable_state_(struct extcon_dev *edev, - enum extcon id, bool cable_state) + unsigned int id, bool cable_state) { return 0; } @@ -327,14 +321,14 @@ static inline struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name) } static inline int extcon_register_notifier(struct extcon_dev *edev, - enum extcon id, + unsigned int id, struct notifier_block *nb) { return 0; } static inline int extcon_unregister_notifier(struct extcon_dev *edev, - enum extcon id, + unsigned int id, struct notifier_block *nb) { return 0; -- cgit v1.2.3 From ef73f886b53548d83d71a439f51a0c13ea6c1dae Mon Sep 17 00:00:00 2001 From: Dmitry Kalinkin Date: Thu, 28 May 2015 15:07:04 +0300 Subject: vme: export vme_check_window() Signed-off-by: Dmitry Kalinkin Cc: Igor Alekseev Signed-off-by: Greg Kroah-Hartman --- include/linux/vme.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vme.h b/include/linux/vme.h index 79242e9c06b8..c0131358f351 100644 --- a/include/linux/vme.h +++ b/include/linux/vme.h @@ -120,6 +120,8 @@ void vme_free_consistent(struct vme_resource *, size_t, void *, dma_addr_t); size_t vme_get_size(struct vme_resource *); +int vme_check_window(u32 aspace, unsigned long long vme_base, + unsigned long long size); struct vme_resource *vme_slave_request(struct vme_dev *, u32, u32); int vme_slave_set(struct vme_resource *, int, unsigned long long, -- cgit v1.2.3 From 1e98a0f08abddde87f0f93237f10629ecb4880ef Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 12 Jun 2015 19:31:32 -0700 Subject: flow_dissector: fix ipv6 dst, hop-by-hop and routing ext hdrs __skb_header_pointer() returns a pointer that must be checked. Fixes infinite loop reported by Alexei, and add __must_check to catch these errors earlier. Fixes: 6a74fcf426f5 ("flow_dissector: add support for dst, hop-by-hop and routing ext hdrs") Reported-by: Alexei Starovoitov Tested-by: Alexei Starovoitov Signed-off-by: Eric Dumazet Acked-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index cc612fc0a894..a7acc92aa668 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2743,8 +2743,9 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum csum); -static inline void *__skb_header_pointer(const struct sk_buff *skb, int offset, - int len, void *data, int hlen, void *buffer) +static inline void * __must_check +__skb_header_pointer(const struct sk_buff *skb, int offset, + int len, void *data, int hlen, void *buffer) { if (hlen - offset >= len) return data + offset; @@ -2756,8 +2757,8 @@ static inline void *__skb_header_pointer(const struct sk_buff *skb, int offset, return buffer; } -static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, - int len, void *buffer) +static inline void * __must_check +skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) { return __skb_header_pointer(skb, offset, len, skb->data, skb_headlen(skb), buffer); -- cgit v1.2.3 From aaeb6e24f5b6cb6a664fbdec6e08b65c3173c1b3 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 12 Jun 2015 21:07:54 +0200 Subject: netfilter: ipset: Use MSEC_PER_SEC consistently Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set_timeout.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h index 83c2f9e0886c..3c8842bcedaa 100644 --- a/include/linux/netfilter/ipset/ip_set_timeout.h +++ b/include/linux/netfilter/ipset/ip_set_timeout.h @@ -61,7 +61,7 @@ ip_set_timeout_set(unsigned long *timeout, u32 t) return; } - *timeout = msecs_to_jiffies(t * 1000) + jiffies; + *timeout = msecs_to_jiffies(t * MSEC_PER_SEC) + jiffies; if (*timeout == IPSET_ELEM_PERMANENT) /* Bingo! :-) */ (*timeout)--; @@ -71,7 +71,7 @@ static inline u32 ip_set_timeout_get(unsigned long *timeout) { return *timeout == IPSET_ELEM_PERMANENT ? 0 : - jiffies_to_msecs(*timeout - jiffies)/1000; + jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC; } #endif /* __KERNEL__ */ -- cgit v1.2.3 From f690cbaed9fe4d77592e24139db7ad790641c4fd Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 12 Jun 2015 22:11:00 +0200 Subject: netfilter: ipset: Fix cidr handling for hash:*net* types Commit "Simplify cidr handling for hash:*net* types" broke the cidr handling for the hash:*net* types when the sets were used by the SET target: entries with invalid cidr values were added to the sets. Reported by Jonathan Johnson. Testsuite entry is added to verify the fix. Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index ffdfdc24952a..a6fe1ce96437 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -545,8 +545,6 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, { .bytes = ULLONG_MAX, .packets = ULLONG_MAX, \ .timeout = (set)->timeout } -#define IP_SET_INIT_CIDR(a, b) ((a) ? (a) : (b)) - #define IPSET_CONCAT(a, b) a##b #define IPSET_TOKEN(a, b) IPSET_CONCAT(a, b) -- cgit v1.2.3 From c4c997839cf92cb1037e43a85cdb4cbf44ed39a5 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 13 Jun 2015 11:59:45 +0200 Subject: netfilter: ipset: Fix parallel resizing and listing of the same set When elements added to a hash:* type of set and resizing triggered, parallel listing could start to list the original set (before resizing) and "continue" with listing the new set. Fix it by references and using the original hash table for listing. Therefore the destroying of the original hash table may happen from the resizing or listing functions. Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index a6fe1ce96437..5674b6ac6646 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -176,6 +176,9 @@ struct ip_set_type_variant { /* List elements */ int (*list)(const struct ip_set *set, struct sk_buff *skb, struct netlink_callback *cb); + /* Keep listing private when resizing runs parallel */ + void (*uref)(struct ip_set *set, struct netlink_callback *cb, + bool start); /* Return true if "b" set is the same as "a" * according to the create set parameters */ @@ -380,12 +383,12 @@ ip_set_init_counter(struct ip_set_counter *counter, /* Netlink CB args */ enum { - IPSET_CB_NET = 0, - IPSET_CB_DUMP, - IPSET_CB_INDEX, - IPSET_CB_ARG0, + IPSET_CB_NET = 0, /* net namespace */ + IPSET_CB_DUMP, /* dump single set/all sets */ + IPSET_CB_INDEX, /* set index */ + IPSET_CB_PRIVATE, /* set private data */ + IPSET_CB_ARG0, /* type specific */ IPSET_CB_ARG1, - IPSET_CB_ARG2, }; /* register and unregister set references */ -- cgit v1.2.3 From b57b2d1fa53fe8563bdfc66a33b844463b9af285 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 13 Jun 2015 14:22:25 +0200 Subject: netfilter: ipset: Prepare the ipset core to use RCU at set level Replace rwlock_t with spinlock_t in "struct ip_set" and change the locking accordingly. Convert the comment extension into an rcu-avare object. Also, simplify the timeout routines. Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 9 ++++-- include/linux/netfilter/ipset/ip_set_comment.h | 38 ++++++++++++++++++-------- include/linux/netfilter/ipset/ip_set_timeout.h | 25 +++++++---------- 3 files changed, 44 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 5674b6ac6646..19b4969a25fe 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -108,8 +108,13 @@ struct ip_set_counter { atomic64_t packets; }; +struct ip_set_comment_rcu { + struct rcu_head rcu; + char str[0]; +}; + struct ip_set_comment { - char *str; + struct ip_set_comment_rcu __rcu *c; }; struct ip_set_skbinfo { @@ -226,7 +231,7 @@ struct ip_set { /* The name of the set */ char name[IPSET_MAXNAMELEN]; /* Lock protecting the set data */ - rwlock_t lock; + spinlock_t lock; /* References to the set */ u32 ref; /* The core set type */ diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h index 21217ea008d7..8d0248525957 100644 --- a/include/linux/netfilter/ipset/ip_set_comment.h +++ b/include/linux/netfilter/ipset/ip_set_comment.h @@ -16,41 +16,57 @@ ip_set_comment_uget(struct nlattr *tb) return nla_data(tb); } +/* Called from uadd only, protected by the set spinlock. + * The kadt functions don't use the comment extensions in any way. + */ static inline void ip_set_init_comment(struct ip_set_comment *comment, const struct ip_set_ext *ext) { + struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1); size_t len = ext->comment ? strlen(ext->comment) : 0; - if (unlikely(comment->str)) { - kfree(comment->str); - comment->str = NULL; + if (unlikely(c)) { + kfree_rcu(c, rcu); + rcu_assign_pointer(comment->c, NULL); } if (!len) return; if (unlikely(len > IPSET_MAX_COMMENT_SIZE)) len = IPSET_MAX_COMMENT_SIZE; - comment->str = kzalloc(len + 1, GFP_ATOMIC); - if (unlikely(!comment->str)) + c = kzalloc(sizeof(*c) + len + 1, GFP_ATOMIC); + if (unlikely(!c)) return; - strlcpy(comment->str, ext->comment, len + 1); + strlcpy(c->str, ext->comment, len + 1); + rcu_assign_pointer(comment->c, c); } +/* Used only when dumping a set, protected by rcu_read_lock_bh() */ static inline int ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment) { - if (!comment->str) + struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c); + + if (!c) return 0; - return nla_put_string(skb, IPSET_ATTR_COMMENT, comment->str); + return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str); } +/* Called from uadd/udel, flush or the garbage collectors protected + * by the set spinlock. + * Called when the set is destroyed and when there can't be any user + * of the set data anymore. + */ static inline void ip_set_comment_free(struct ip_set_comment *comment) { - if (unlikely(!comment->str)) + struct ip_set_comment_rcu *c; + + c = rcu_dereference_protected(comment->c, 1); + if (unlikely(!c)) return; - kfree(comment->str); - comment->str = NULL; + kfree_rcu(c, rcu); + rcu_assign_pointer(comment->c, NULL); } #endif diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h index 3c8842bcedaa..1d6a935c1ac5 100644 --- a/include/linux/netfilter/ipset/ip_set_timeout.h +++ b/include/linux/netfilter/ipset/ip_set_timeout.h @@ -40,31 +40,26 @@ ip_set_timeout_uget(struct nlattr *tb) } static inline bool -ip_set_timeout_test(unsigned long timeout) +ip_set_timeout_expired(unsigned long *t) { - return timeout == IPSET_ELEM_PERMANENT || - time_is_after_jiffies(timeout); -} - -static inline bool -ip_set_timeout_expired(unsigned long *timeout) -{ - return *timeout != IPSET_ELEM_PERMANENT && - time_is_before_jiffies(*timeout); + return *t != IPSET_ELEM_PERMANENT && time_is_before_jiffies(*t); } static inline void -ip_set_timeout_set(unsigned long *timeout, u32 t) +ip_set_timeout_set(unsigned long *timeout, u32 value) { - if (!t) { + unsigned long t; + + if (!value) { *timeout = IPSET_ELEM_PERMANENT; return; } - *timeout = msecs_to_jiffies(t * MSEC_PER_SEC) + jiffies; - if (*timeout == IPSET_ELEM_PERMANENT) + t = msecs_to_jiffies(value * MSEC_PER_SEC) + jiffies; + if (t == IPSET_ELEM_PERMANENT) /* Bingo! :-) */ - (*timeout)--; + t--; + *timeout = t; } static inline u32 -- cgit v1.2.3 From ca0f6a5cd99e0c6ba4bb78dc402817f636370f26 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 13 Jun 2015 19:45:33 +0200 Subject: netfilter: ipset: Fix coding styles reported by checkpatch.pl Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 19b4969a25fe..48bb01edcf30 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -349,12 +349,11 @@ ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo) cpu_to_be64((u64)skbinfo->skbmark << 32 | skbinfo->skbmarkmask))) || (skbinfo->skbprio && - nla_put_net32(skb, IPSET_ATTR_SKBPRIO, + nla_put_net32(skb, IPSET_ATTR_SKBPRIO, cpu_to_be32(skbinfo->skbprio))) || (skbinfo->skbqueue && - nla_put_net16(skb, IPSET_ATTR_SKBQUEUE, + nla_put_net16(skb, IPSET_ATTR_SKBQUEUE, cpu_to_be16(skbinfo->skbqueue))); - } static inline void -- cgit v1.2.3 From c751ad0dd640f4ce9269acd7a54de5ba8092e99e Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 12 Jun 2015 15:48:06 -0700 Subject: regulator: Add docbook for soft start The docbook for these members is missing. Add them. Warning(include/linux/regulator/machine.h:147): No description found for parameter 'soft_start' Warning(include/linux/regulator/driver.h:197): No description found for parameter 'set_soft_start' Reported-by: kbuild test robot Signed-off-by: Stephen Boyd Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 1 + include/linux/regulator/machine.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index e0635d0894aa..9398d31f9531 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -111,6 +111,7 @@ struct regulator_linear_range { * to stabilise after being set to a new value, in microseconds. * The function provides the from and to voltage selector, the * function should return the worst case. + * @set_soft_start: Enable soft start for the regulator. * * @set_suspend_voltage: Set the voltage for the regulator when the system * is suspended. diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index 7f7d0a3fe1e1..1258275d3751 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -87,6 +87,7 @@ struct regulator_state { * applied. * @apply_uV: Apply the voltage constraint when initialising. * @ramp_disable: Disable ramp delay when initialising or when setting voltage. + * @soft_start: Enable soft start so that voltage ramps slowly. * @pull_down: Enable pull down when regulator is disabled. * * @input_uV: Input voltage for regulator when supplied by another regulator. -- cgit v1.2.3 From d45337328b1fb86a8f045f6c9938e9e08e6d7134 Mon Sep 17 00:00:00 2001 From: Vincent Wan Date: Thu, 11 Jun 2015 20:11:45 +0800 Subject: pci_ids: Add AMD KERNCZ device ID support The KERNCZ is new AMD SB/FCH generation name, like HUDSON2. 0x790b is the device ID for this generation. Signed-off-by: Wan ZongShun Acked-by: Bjorn Helgaas Signed-off-by: Ulf Hansson --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 2f7b9a40f627..cb63a7b522ef 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -579,6 +579,7 @@ #define PCI_DEVICE_ID_AMD_HUDSON2_SATA_IDE 0x7800 #define PCI_DEVICE_ID_AMD_HUDSON2_SMBUS 0x780b #define PCI_DEVICE_ID_AMD_HUDSON2_IDE 0x780c +#define PCI_DEVICE_ID_AMD_KERNCZ_SMBUS 0x790b #define PCI_VENDOR_ID_TRIDENT 0x1023 #define PCI_DEVICE_ID_TRIDENT_4DWAVE_DX 0x2000 -- cgit v1.2.3 From 32be6d3e362b896c81aae7c635d44e5a91406ce2 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Fri, 12 Jun 2015 10:58:46 -0400 Subject: crypto: nx - move include/linux/nx842.h into drivers/crypto/nx/nx-842.h Move the contents of the include/linux/nx842.h header file into the drivers/crypto/nx/nx-842.h header file. Remove the nx842.h header file and its entry in the MAINTAINERS file. The include/linux/nx842.h header originally was there because the crypto/842.c driver needed it to communicate with the nx-842 hw driver. However, that crypto compression driver was moved into the drivers/crypto/nx/ directory, and now can directly include the nx-842.h header. Nothing else needs the public include/linux/nx842.h header file, as all use of the nx-842 hardware driver will be through the "842-nx" crypto compression driver, since the direct nx-842 api is very limited in the buffer alignments and sizes that it will accept, and the crypto compression interface handles those limitations and allows any alignment and size buffers. Signed-off-by: Dan Streetman Signed-off-by: Herbert Xu --- include/linux/nx842.h | 24 ------------------------ 1 file changed, 24 deletions(-) delete mode 100644 include/linux/nx842.h (limited to 'include/linux') diff --git a/include/linux/nx842.h b/include/linux/nx842.h deleted file mode 100644 index 4ddf68d9c0d4..000000000000 --- a/include/linux/nx842.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef __NX842_H__ -#define __NX842_H__ - -#define __NX842_PSERIES_MEM_COMPRESS (10240) -#define __NX842_POWERNV_MEM_COMPRESS (1024) - -#define NX842_MEM_COMPRESS (max_t(unsigned int, \ - __NX842_PSERIES_MEM_COMPRESS, __NX842_POWERNV_MEM_COMPRESS)) - -struct nx842_constraints { - int alignment; - int multiple; - int minimum; - int maximum; -}; - -int nx842_constraints(struct nx842_constraints *constraints); - -int nx842_compress(const unsigned char *in, unsigned int in_len, - unsigned char *out, unsigned int *out_len, void *wrkmem); -int nx842_decompress(const unsigned char *in, unsigned int in_len, - unsigned char *out, unsigned int *out_len, void *wrkmem); - -#endif -- cgit v1.2.3 From e7b707f96820161820a864d2ee81740a14da6b93 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Wed, 20 May 2015 11:31:27 +0200 Subject: mfd: cros_ec: Remove parent field Parent and device were pointing to the same device structure. Parent is unused, removed. Signed-off-by: Gwendal Grignou Tested-by: Stephen Barber Tested-by: Heiko Stuebner Tested-by: Gwendal Grignou Reviewed-by: Puthikorn Voravootivat Signed-off-by: Javier Martinez Canillas Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 324a34683971..14cf522123dd 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -85,7 +85,6 @@ struct cros_ec_command { * to using dword. * @din_size: size of din buffer to allocate (zero to use static din) * @dout_size: size of dout buffer to allocate (zero to use static dout) - * @parent: pointer to parent device (e.g. i2c or spi device) * @wake_enabled: true if this device can wake the system from sleep * @cmd_xfer: send command to EC and get response * Returns the number of bytes received if the communication succeeded, but @@ -113,7 +112,6 @@ struct cros_ec_device { uint8_t *dout; int din_size; int dout_size; - struct device *parent; bool wake_enabled; int (*cmd_xfer)(struct cros_ec_device *ec, struct cros_ec_command *msg); -- cgit v1.2.3 From a841178445bb72a3d566b4e6ab9d19e9b002eb47 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 9 Jun 2015 13:04:42 +0200 Subject: mfd: cros_ec: Use a zero-length array for command data Commit 1b84f2a4cd4a ("mfd: cros_ec: Use fixed size arrays to transfer data with the EC") modified the struct cros_ec_command fields to not use pointers for the input and output buffers and use fixed length arrays instead. This change was made because the cros_ec ioctl API uses that struct cros_ec_command to allow user-space to send commands to the EC and to get data from the EC. So using pointers made the API not 64-bit safe. Unfortunately this approach was not flexible enough for all the use-cases since there may be a need to send larger commands on newer versions of the EC command protocol. So to avoid to choose a constant length that it may be too big for most commands and thus wasting memory and CPU cycles on copy from and to user-space or having a size that is too small for some big commands, use a zero-length array that is both 64-bit safe and flexible. The same buffer is used for both output and input data so the maximum of these values should be used to allocate it. Suggested-by: Gwendal Grignou Signed-off-by: Javier Martinez Canillas Tested-by: Heiko Stuebner Acked-by: Lee Jones Acked-by: Olof Johansson Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 14cf522123dd..7eee38abd02a 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -42,8 +42,7 @@ enum { * @outsize: Outgoing length in bytes * @insize: Max number of bytes to accept from EC * @result: EC's response to the command (separate from communication failure) - * @outdata: Outgoing data to EC - * @indata: Where to put the incoming data from EC + * @data: Where to put the incoming data from EC and outgoing data to EC */ struct cros_ec_command { uint32_t version; @@ -51,8 +50,7 @@ struct cros_ec_command { uint32_t outsize; uint32_t insize; uint32_t result; - uint8_t outdata[EC_PROTO2_MAX_PARAM_SIZE]; - uint8_t indata[EC_PROTO2_MAX_PARAM_SIZE]; + uint8_t data[0]; }; /** -- cgit v1.2.3 From 256ab950bdaa8797b7bac8fc11a567030d486304 Mon Sep 17 00:00:00 2001 From: Stephen Barber Date: Tue, 9 Jun 2015 13:04:43 +0200 Subject: mfd: cros_ec: rev cros_ec_commands.h Update cros_ec_commands.h to the latest version in the EC firmware sources and add power domain and passthru commands. Also, update lightbar to use new command names. Signed-off-by: Stephen Barber Reviewed-by: Randall Spangler Signed-off-by: Javier Martinez Canillas Tested-by: Heiko Stuebner Reviewed-by: Gwendal Grignou Tested-by: Gwendal Grignou Acked-by: Lee Jones Acked-by: Olof Johansson Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 277 ++++++++++++++++++++++++++++++++--- 1 file changed, 255 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index a49cd41feea7..13b630c10d4c 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -515,7 +515,7 @@ struct ec_host_response { /* * Notes on commands: * - * Each command is an 8-byte command value. Commands which take params or + * Each command is an 16-bit command value. Commands which take params or * return response data specify structs for that data. If no struct is * specified, the command does not input or output data, respectively. * Parameter/response length is implicit in the structs. Some underlying @@ -966,7 +966,7 @@ struct rgb_s { /* List of tweakable parameters. NOTE: It's __packed so it can be sent in a * host command, but the alignment is the same regardless. Keep it that way. */ -struct lightbar_params { +struct lightbar_params_v0 { /* Timing */ int32_t google_ramp_up; int32_t google_ramp_down; @@ -1000,32 +1000,81 @@ struct lightbar_params { struct rgb_s color[8]; /* 0-3 are Google colors */ } __packed; +struct lightbar_params_v1 { + /* Timing */ + int32_t google_ramp_up; + int32_t google_ramp_down; + int32_t s3s0_ramp_up; + int32_t s0_tick_delay[2]; /* AC=0/1 */ + int32_t s0a_tick_delay[2]; /* AC=0/1 */ + int32_t s0s3_ramp_down; + int32_t s3_sleep_for; + int32_t s3_ramp_up; + int32_t s3_ramp_down; + int32_t tap_tick_delay; + int32_t tap_display_time; + + /* Tap-for-battery params */ + uint8_t tap_pct_red; + uint8_t tap_pct_green; + uint8_t tap_seg_min_on; + uint8_t tap_seg_max_on; + uint8_t tap_seg_osc; + uint8_t tap_idx[3]; + + /* Oscillation */ + uint8_t osc_min[2]; /* AC=0/1 */ + uint8_t osc_max[2]; /* AC=0/1 */ + uint8_t w_ofs[2]; /* AC=0/1 */ + + /* Brightness limits based on the backlight and AC. */ + uint8_t bright_bl_off_fixed[2]; /* AC=0/1 */ + uint8_t bright_bl_on_min[2]; /* AC=0/1 */ + uint8_t bright_bl_on_max[2]; /* AC=0/1 */ + + /* Battery level thresholds */ + uint8_t battery_threshold[LB_BATTERY_LEVELS - 1]; + + /* Map [AC][battery_level] to color index */ + uint8_t s0_idx[2][LB_BATTERY_LEVELS]; /* AP is running */ + uint8_t s3_idx[2][LB_BATTERY_LEVELS]; /* AP is sleeping */ + + /* Color palette */ + struct rgb_s color[8]; /* 0-3 are Google colors */ +} __packed; + struct ec_params_lightbar { uint8_t cmd; /* Command (see enum lightbar_command) */ union { struct { /* no args */ - } dump, off, on, init, get_seq, get_params, version; + } dump, off, on, init, get_seq, get_params_v0, get_params_v1, + version, get_brightness, get_demo; - struct num { + struct { uint8_t num; - } brightness, seq, demo; + } set_brightness, seq, demo; - struct reg { + struct { uint8_t ctrl, reg, value; } reg; - struct rgb { + struct { uint8_t led, red, green, blue; - } rgb; + } set_rgb; + + struct { + uint8_t led; + } get_rgb; - struct lightbar_params set_params; + struct lightbar_params_v0 set_params_v0; + struct lightbar_params_v1 set_params_v1; }; } __packed; struct ec_response_lightbar { union { - struct dump { + struct { struct { uint8_t reg; uint8_t ic0; @@ -1033,20 +1082,26 @@ struct ec_response_lightbar { } vals[23]; } dump; - struct get_seq { + struct { uint8_t num; - } get_seq; + } get_seq, get_brightness, get_demo; - struct lightbar_params get_params; + struct lightbar_params_v0 get_params_v0; + struct lightbar_params_v1 get_params_v1; - struct version { + struct { uint32_t num; uint32_t flags; } version; + struct { + uint8_t red, green, blue; + } get_rgb; + struct { /* no return params */ - } off, on, init, brightness, seq, reg, rgb, demo, set_params; + } off, on, init, set_brightness, seq, reg, set_rgb, + demo, set_params_v0, set_params_v1; }; } __packed; @@ -1056,15 +1111,20 @@ enum lightbar_command { LIGHTBAR_CMD_OFF = 1, LIGHTBAR_CMD_ON = 2, LIGHTBAR_CMD_INIT = 3, - LIGHTBAR_CMD_BRIGHTNESS = 4, + LIGHTBAR_CMD_SET_BRIGHTNESS = 4, LIGHTBAR_CMD_SEQ = 5, LIGHTBAR_CMD_REG = 6, - LIGHTBAR_CMD_RGB = 7, + LIGHTBAR_CMD_SET_RGB = 7, LIGHTBAR_CMD_GET_SEQ = 8, LIGHTBAR_CMD_DEMO = 9, - LIGHTBAR_CMD_GET_PARAMS = 10, - LIGHTBAR_CMD_SET_PARAMS = 11, + LIGHTBAR_CMD_GET_PARAMS_V0 = 10, + LIGHTBAR_CMD_SET_PARAMS_V0 = 11, LIGHTBAR_CMD_VERSION = 12, + LIGHTBAR_CMD_GET_BRIGHTNESS = 13, + LIGHTBAR_CMD_GET_RGB = 14, + LIGHTBAR_CMD_GET_DEMO = 15, + LIGHTBAR_CMD_GET_PARAMS_V1 = 16, + LIGHTBAR_CMD_SET_PARAMS_V1 = 17, LIGHTBAR_NUM_CMDS }; @@ -1421,8 +1481,40 @@ struct ec_response_rtc { /*****************************************************************************/ /* Port80 log access */ +/* Maximum entries that can be read/written in a single command */ +#define EC_PORT80_SIZE_MAX 32 + /* Get last port80 code from previous boot */ #define EC_CMD_PORT80_LAST_BOOT 0x48 +#define EC_CMD_PORT80_READ 0x48 + +enum ec_port80_subcmd { + EC_PORT80_GET_INFO = 0, + EC_PORT80_READ_BUFFER, +}; + +struct ec_params_port80_read { + uint16_t subcmd; + union { + struct { + uint32_t offset; + uint32_t num_entries; + } read_buffer; + }; +} __packed; + +struct ec_response_port80_read { + union { + struct { + uint32_t writes; + uint32_t history_size; + uint32_t last_boot; + } get_info; + struct { + uint16_t codes[EC_PORT80_SIZE_MAX]; + } data; + }; +} __packed; struct ec_response_port80_last_boot { uint16_t code; @@ -1782,6 +1874,7 @@ struct ec_params_gpio_set { /* Get GPIO value */ #define EC_CMD_GPIO_GET 0x93 +/* Version 0 of input params and response */ struct ec_params_gpio_get { char name[32]; } __packed; @@ -1789,6 +1882,38 @@ struct ec_response_gpio_get { uint8_t val; } __packed; +/* Version 1 of input params and response */ +struct ec_params_gpio_get_v1 { + uint8_t subcmd; + union { + struct { + char name[32]; + } get_value_by_name; + struct { + uint8_t index; + } get_info; + }; +} __packed; + +struct ec_response_gpio_get_v1 { + union { + struct { + uint8_t val; + } get_value_by_name, get_count; + struct { + uint8_t val; + char name[32]; + uint32_t flags; + } get_info; + }; +} __packed; + +enum gpio_get_subcmd { + EC_GPIO_GET_BY_NAME = 0, + EC_GPIO_GET_COUNT = 1, + EC_GPIO_GET_INFO = 2, +}; + /*****************************************************************************/ /* I2C commands. Only available when flash write protect is unlocked. */ @@ -1857,13 +1982,21 @@ struct ec_params_charge_control { /*****************************************************************************/ /* - * Cut off battery power output if the battery supports. + * Cut off battery power immediately or after the host has shut down. * - * For unsupported battery, just don't implement this command and lets EC - * return EC_RES_INVALID_COMMAND. + * return EC_RES_INVALID_COMMAND if unsupported by a board/battery. + * EC_RES_SUCCESS if the command was successful. + * EC_RES_ERROR if the cut off command failed. */ + #define EC_CMD_BATTERY_CUT_OFF 0x99 +#define EC_BATTERY_CUTOFF_FLAG_AT_SHUTDOWN (1 << 0) + +struct ec_params_battery_cutoff { + uint8_t flags; +} __packed; + /*****************************************************************************/ /* USB port mux control. */ @@ -2141,6 +2274,32 @@ struct ec_params_sb_wr_block { uint16_t data[32]; } __packed; +/*****************************************************************************/ +/* Battery vendor parameters + * + * Get or set vendor-specific parameters in the battery. Implementations may + * differ between boards or batteries. On a set operation, the response + * contains the actual value set, which may be rounded or clipped from the + * requested value. + */ + +#define EC_CMD_BATTERY_VENDOR_PARAM 0xb4 + +enum ec_battery_vendor_param_mode { + BATTERY_VENDOR_PARAM_MODE_GET = 0, + BATTERY_VENDOR_PARAM_MODE_SET, +}; + +struct ec_params_battery_vendor_param { + uint32_t param; + uint32_t value; + uint8_t mode; +} __packed; + +struct ec_response_battery_vendor_param { + uint32_t value; +} __packed; + /*****************************************************************************/ /* System commands */ @@ -2336,6 +2495,80 @@ struct ec_params_reboot_ec { #endif /* !__ACPI__ */ +/*****************************************************************************/ +/* + * PD commands + * + * These commands are for PD MCU communication. + */ + +/* EC to PD MCU exchange status command */ +#define EC_CMD_PD_EXCHANGE_STATUS 0x100 + +/* Status of EC being sent to PD */ +struct ec_params_pd_status { + int8_t batt_soc; /* battery state of charge */ +} __packed; + +/* Status of PD being sent back to EC */ +struct ec_response_pd_status { + int8_t status; /* PD MCU status */ + uint32_t curr_lim_ma; /* input current limit */ +} __packed; + +/* Set USB type-C port role and muxes */ +#define EC_CMD_USB_PD_CONTROL 0x101 + +enum usb_pd_control_role { + USB_PD_CTRL_ROLE_NO_CHANGE = 0, + USB_PD_CTRL_ROLE_TOGGLE_ON = 1, /* == AUTO */ + USB_PD_CTRL_ROLE_TOGGLE_OFF = 2, + USB_PD_CTRL_ROLE_FORCE_SINK = 3, + USB_PD_CTRL_ROLE_FORCE_SOURCE = 4, +}; + +enum usb_pd_control_mux { + USB_PD_CTRL_MUX_NO_CHANGE = 0, + USB_PD_CTRL_MUX_NONE = 1, + USB_PD_CTRL_MUX_USB = 2, + USB_PD_CTRL_MUX_DP = 3, + USB_PD_CTRL_MUX_DOCK = 4, + USB_PD_CTRL_MUX_AUTO = 5, +}; + +struct ec_params_usb_pd_control { + uint8_t port; + uint8_t role; + uint8_t mux; +} __packed; + +/*****************************************************************************/ +/* + * Passthru commands + * + * Some platforms have sub-processors chained to each other. For example. + * + * AP <--> EC <--> PD MCU + * + * The top 2 bits of the command number are used to indicate which device the + * command is intended for. Device 0 is always the device receiving the + * command; other device mapping is board-specific. + * + * When a device receives a command to be passed to a sub-processor, it passes + * it on with the device number set back to 0. This allows the sub-processor + * to remain blissfully unaware of whether the command originated on the next + * device up the chain, or was passed through from the AP. + * + * In the above example, if the AP wants to send command 0x0002 to the PD MCU, + * AP sends command 0x4002 to the EC + * EC sends command 0x0002 to the PD MCU + * EC forwards PD MCU response back to the AP + */ + +/* Offset and max command number for sub-device n */ +#define EC_CMD_PASSTHRU_OFFSET(n) (0x4000 * (n)) +#define EC_CMD_PASSTHRU_MAX(n) (EC_CMD_PASSTHRU_OFFSET(n) + 0x3fff) + /*****************************************************************************/ /* * Deprecated constants. These constants have been renamed for clarity. The -- cgit v1.2.3 From 2c7589af3c4dee844e6a4174f2aa8996cf837604 Mon Sep 17 00:00:00 2001 From: Stephen Barber Date: Tue, 9 Jun 2015 13:04:45 +0200 Subject: mfd: cros_ec: add proto v3 skeleton Add support in cros_ec.c to handle EC host command protocol v3. For v3+, probe for maximum shared protocol version and max request, response, and passthrough sizes. For now, this will always fall back to v2, since there is no bus-specific code for handling proto v3 packets. Signed-off-by: Stephen Barber Signed-off-by: Javier Martinez Canillas Reviewed-by: Gwendal Grignou Tested-by: Gwendal Grignou Tested-by: Heiko Stuebner Acked-by: Lee Jones Acked-by: Olof Johansson Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec.h | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 7eee38abd02a..59d909434efd 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -21,6 +21,15 @@ #include #include +/* + * Max bus-specific overhead incurred by request/responses. + * I2C requires 1 additional byte for requests. + * I2C requires 2 additional bytes for responses. + * */ +#define EC_PROTO_VERSION_UNKNOWN 0 +#define EC_MAX_REQUEST_OVERHEAD 1 +#define EC_MAX_RESPONSE_OVERHEAD 2 + /* * Command interface between EC and AP, for LPC, I2C and SPI interfaces. */ @@ -88,6 +97,7 @@ struct cros_ec_command { * Returns the number of bytes received if the communication succeeded, but * that doesn't mean the EC was happy with the command. The caller * should check msg.result for the EC's result code. + * @pkt_xfer: send packet to EC and get response * @lock: one transaction at a time */ struct cros_ec_device { @@ -104,15 +114,21 @@ struct cros_ec_device { unsigned int bytes, void *dest); /* These are used to implement the platform-specific interface */ + u16 max_request; + u16 max_response; + u16 max_passthru; + u16 proto_version; void *priv; int irq; - uint8_t *din; - uint8_t *dout; + u8 *din; + u8 *dout; int din_size; int dout_size; bool wake_enabled; int (*cmd_xfer)(struct cros_ec_device *ec, struct cros_ec_command *msg); + int (*pkt_xfer)(struct cros_ec_device *ec, + struct cros_ec_command *msg); struct mutex lock; }; @@ -194,4 +210,12 @@ int cros_ec_remove(struct cros_ec_device *ec_dev); */ int cros_ec_register(struct cros_ec_device *ec_dev); +/** + * cros_ec_register - Query the protocol version supported by the ChromeOS EC + * + * @ec_dev: Device to register + * @return 0 if ok, -ve on error + */ +int cros_ec_query_all(struct cros_ec_device *ec_dev); + #endif /* __LINUX_MFD_CROS_EC_H */ -- cgit v1.2.3 From d365407079d33106f76bd486a863de05eb5ae95d Mon Sep 17 00:00:00 2001 From: Stephen Barber Date: Tue, 9 Jun 2015 13:04:46 +0200 Subject: mfd: cros_ec: add bus-specific proto v3 code Add proto v3 support to the SPI, I2C, and LPC. Signed-off-by: Stephen Barber Signed-off-by: Javier Martinez Canillas Tested-by: Heiko Stuebner Reviewed-by: Gwendal Grignou Tested-by: Gwendal Grignou Acked-by: Lee Jones Acked-by: Olof Johansson Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 59d909434efd..92e13aaa450c 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -21,6 +21,12 @@ #include #include +/* + * The EC is unresponsive for a time after a reboot command. Add a + * simple delay to make sure that the bus stays locked. + */ +#define EC_REBOOT_DELAY_MS 50 + /* * Max bus-specific overhead incurred by request/responses. * I2C requires 1 additional byte for requests. -- cgit v1.2.3 From 57b33ff077beebb68481a2b6b8e5fe58ca998169 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Tue, 9 Jun 2015 13:04:47 +0200 Subject: mfd: cros_ec: Support multiple EC in a system Chromebooks can have more than one Embedded Controller so the cros_ec device id has to be incremented for each EC registered. Add a new structure to represent multiple EC as different char devices (e.g: /dev/cros_ec, /dev/cros_pd). It connects to cros_ec_device and allows sysfs inferface for cros_pd. Also reduce number of allocated objects, make chromeos sysfs class object a static and add refcounting to prevent object deletion while command is in progress. Signed-off-by: Gwendal Grignou Reviewed-by: Dmitry Torokhov Signed-off-by: Javier Martinez Canillas Tested-by: Heiko Stuebner Acked-by: Lee Jones Acked-by: Olof Johansson Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec.h | 44 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 92e13aaa450c..da72671a42fa 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -17,10 +17,14 @@ #define __LINUX_MFD_CROS_EC_H #include +#include #include #include #include +#define CROS_EC_DEV_NAME "cros_ec" +#define CROS_EC_DEV_PD_NAME "cros_pd" + /* * The EC is unresponsive for a time after a reboot command. Add a * simple delay to make sure that the bus stays locked. @@ -71,11 +75,8 @@ struct cros_ec_command { /** * struct cros_ec_device - Information about a ChromeOS EC device * - * @ec_name: name of EC device (e.g. 'chromeos-ec') * @phys_name: name of physical comms layer (e.g. 'i2c-4') * @dev: Device pointer for physical comms device - * @vdev: Device pointer for virtual comms device - * @cdev: Character device structure for virtual comms device * @was_wake_device: true if this device was set to wake the system from * sleep at the last suspend * @cmd_readmem: direct read of the EC memory-mapped region, if supported @@ -87,6 +88,7 @@ struct cros_ec_command { * * @priv: Private data * @irq: Interrupt to use + * @id: Device id * @din: input buffer (for data from EC) * @dout: output buffer (for data to EC) * \note @@ -109,11 +111,8 @@ struct cros_ec_command { struct cros_ec_device { /* These are used by other drivers that want to talk to the EC */ - const char *ec_name; const char *phys_name; struct device *dev; - struct device *vdev; - struct cdev cdev; bool was_wake_device; struct class *cros_class; int (*cmd_readmem)(struct cros_ec_device *ec, unsigned int offset, @@ -138,6 +137,35 @@ struct cros_ec_device { struct mutex lock; }; +/* struct cros_ec_platform - ChromeOS EC platform information + * + * @ec_name: name of EC device (e.g. 'cros-ec', 'cros-pd', ...) + * used in /dev/ and sysfs. + * @cmd_offset: offset to apply for each command. Set when + * registering a devicde behind another one. + */ +struct cros_ec_platform { + const char *ec_name; + u16 cmd_offset; +}; + +/* + * struct cros_ec_dev - ChromeOS EC device entry point + * + * @class_dev: Device structure used in sysfs + * @cdev: Character device structure in /dev + * @ec_dev: cros_ec_device structure to talk to the physical device + * @dev: pointer to the platform device + * @cmd_offset: offset to apply for each command. + */ +struct cros_ec_dev { + struct device class_dev; + struct cdev cdev; + struct cros_ec_device *ec_dev; + struct device *dev; + u16 cmd_offset; +}; + /** * cros_ec_suspend - Handle a suspend operation for the ChromeOS EC device * @@ -224,4 +252,8 @@ int cros_ec_register(struct cros_ec_device *ec_dev); */ int cros_ec_query_all(struct cros_ec_device *ec_dev); +/* sysfs stuff */ +extern struct attribute_group cros_ec_attr_group; +extern struct attribute_group cros_ec_lightbar_attr_group; + #endif /* __LINUX_MFD_CROS_EC_H */ -- cgit v1.2.3 From d0562674838c08ff142c0e9a8e12634e133c4361 Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Wed, 10 Jun 2015 11:08:52 -0500 Subject: ACPI / scan: Parse _CCA and setup device coherency This patch implements support for ACPI _CCA object, which is introduced in ACPIv5.1, can be used for specifying device DMA coherency attribute. The parsing logic traverses device namespace to parse coherency information, and stores it in acpi_device_flags. Then uses it to call arch_setup_dma_ops() when creating each device enumerated in DSDT during ACPI scan. This patch also introduces acpi_dma_is_coherent(), which provides an interface for device drivers to check the coherency information similarly to the of_dma_is_coherent(). Signed-off-by: Mark Salter Signed-off-by: Suravee Suthikulpanit Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index e4da5e35e29c..d46a48c21c67 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -569,6 +569,11 @@ static inline int acpi_device_modalias(struct device *dev, return -ENODEV; } +static inline bool acpi_check_dma(struct acpi_device *adev, bool *coherent) +{ + return false; +} + #define ACPI_PTR(_ptr) (NULL) #endif /* !CONFIG_ACPI */ -- cgit v1.2.3 From 05ca556003b1d6b4df0b8831e4c07fad7f5bdd2c Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Wed, 10 Jun 2015 11:08:54 -0500 Subject: device property: Introduces device_dma_is_coherent() Currently, device drivers, which support both OF and ACPI, need to call two separate APIs, of_dma_is_coherent() and acpi_dma_is_coherent()) to determine device coherency attribute. This patch simplifies this process by introducing a new device property API, device_dma_is_coherent(), which calls the appropriate interface based on the booting architecture. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index de8bdf417a35..76ebde9c11d4 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -164,4 +164,6 @@ struct property_set { void device_add_property_set(struct device *dev, struct property_set *pset); +bool device_dma_is_coherent(struct device *dev); + #endif /* _LINUX_PROPERTY_H_ */ -- cgit v1.2.3 From 711bdde6a884354ddae8da2fcb495b2a9364cc90 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Jun 2015 09:57:30 -0700 Subject: netfilter: x_tables: remove XT_TABLE_INFO_SZ and a dereference. After Florian patches, there is no need for XT_TABLE_INFO_SZ anymore : Only one copy of table is kept, instead of one copy per cpu. We also can avoid a dereference if we put table data right after xt_table_info. It reduces register pressure and helps compiler. Then, we attempt a kmalloc() if total size is under order-3 allocation, to reduce TLB pressure, as in many cases, rules fit in 32 KB. Signed-off-by: Eric Dumazet Cc: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 9969d79dcde1..95693c4cebdd 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -225,12 +225,9 @@ struct xt_table_info { unsigned int __percpu *stackptr; void ***jumpstack; - /* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */ - void *entries; + unsigned char entries[0] __aligned(8); }; -#define XT_TABLE_INFO_SZ (offsetof(struct xt_table_info, entries) \ - + nr_cpu_ids * sizeof(char *)) int xt_register_target(struct xt_target *target); void xt_unregister_target(struct xt_target *target); int xt_register_targets(struct xt_target *target, unsigned int n); -- cgit v1.2.3 From 6f6a6fda294506dfe0e3e0a253bb2d2923f28f0a Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Mon, 15 Jun 2015 14:36:01 -0400 Subject: jbd2: fix ocfs2 corrupt when updating journal superblock fails If updating journal superblock fails after journal data has been flushed, the error is omitted and this will mislead the caller as a normal case. In ocfs2, the checkpoint will be treated successfully and the other node can get the lock to update. Since the sb_start is still pointing to the old log block, it will rewrite the journal data during journal recovery by the other node. Thus the new updates will be overwritten and ocfs2 corrupts. So in above case we have to return the error, and ocfs2_commit_cache will take care of the error and prevent the other node to do update first. And only after recovering journal it can do the new updates. The issue discussion mail can be found at: https://oss.oracle.com/pipermail/ocfs2-devel/2015-June/010856.html http://comments.gmane.org/gmane.comp.file-systems.ext4/48841 [ Fixed bug in patch which allowed a non-negative error return from jbd2_cleanup_journal_tail() to leak out of jbd2_fjournal_flush(); this was causing xfstests ext4/306 to fail. -- Ted ] Reported-by: Yiwen Jiang Signed-off-by: Joseph Qi Signed-off-by: Theodore Ts'o Tested-by: Yiwen Jiang Cc: Junxiao Bi Cc: stable@vger.kernel.org --- include/linux/jbd2.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 20e7f78041c8..edb640ae9a94 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1035,7 +1035,7 @@ struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal); int jbd2_journal_next_log_block(journal_t *, unsigned long long *); int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid, unsigned long *block); -void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); +int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); /* Commit management */ @@ -1157,7 +1157,7 @@ extern int jbd2_journal_recover (journal_t *journal); extern int jbd2_journal_wipe (journal_t *, int); extern int jbd2_journal_skip_recovery (journal_t *); extern void jbd2_journal_update_sb_errno(journal_t *); -extern void jbd2_journal_update_sb_log_tail (journal_t *, tid_t, +extern int jbd2_journal_update_sb_log_tail (journal_t *, tid_t, unsigned long, int); extern void __jbd2_journal_abort_hard (journal_t *); extern void jbd2_journal_abort (journal_t *, int); -- cgit v1.2.3 From ffeedafbf0236f03aeb2e8db273b3e5ae5f5bc89 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 12 Jun 2015 19:39:12 -0700 Subject: bpf: introduce current->pid, tgid, uid, gid, comm accessors eBPF programs attached to kprobes need to filter based on current->pid, uid and other fields, so introduce helper functions: u64 bpf_get_current_pid_tgid(void) Return: current->tgid << 32 | current->pid u64 bpf_get_current_uid_gid(void) Return: current_gid << 32 | current_uid bpf_get_current_comm(char *buf, int size_of_buf) stores current->comm into buf They can be used from the programs attached to TC as well to classify packets based on current task fields. Update tracex2 example to print histogram of write syscalls for each process instead of aggregated for all. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2235aee8096a..1b9a3f5b27f6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -188,5 +188,8 @@ extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; extern const struct bpf_func_proto bpf_tail_call_proto; extern const struct bpf_func_proto bpf_ktime_get_ns_proto; +extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto; +extern const struct bpf_func_proto bpf_get_current_uid_gid_proto; +extern const struct bpf_func_proto bpf_get_current_comm_proto; #endif /* _LINUX_BPF_H */ -- cgit v1.2.3 From 0756ea3e85139d23a8148ebaa95411c2f0aa4f11 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 12 Jun 2015 19:39:13 -0700 Subject: bpf: allow networking programs to use bpf_trace_printk() for debugging bpf_trace_printk() is a helper function used to debug eBPF programs. Let socket and TC programs use it as well. Note, it's DEBUG ONLY helper. If it's used in the program, the kernel will print warning banner to make sure users don't use it in production. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1b9a3f5b27f6..4383476a0d48 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -150,6 +150,7 @@ struct bpf_array { u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); void bpf_prog_array_map_clear(struct bpf_map *map); bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); +const struct bpf_func_proto *bpf_get_trace_printk_proto(void); #ifdef CONFIG_BPF_SYSCALL void bpf_register_prog_type(struct bpf_prog_type_list *tl); -- cgit v1.2.3 From 9464ca650008615d28d9aaf7de99b4edf61f0109 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 12 Jun 2015 19:44:48 -0700 Subject: net: make u64_stats_init() a function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using a function instead of a macro is cleaner and remove following W=1 warnings (extract) In file included from net/ipv6/ip6_vti.c:29:0: net/ipv6/ip6_vti.c: In function ‘vti6_dev_init_gen’: include/linux/netdevice.h:2029:18: warning: variable ‘stat’ set but not used [-Wunused-but-set-variable] typeof(type) *stat; \ ^ net/ipv6/ip6_vti.c:862:16: note: in expansion of macro ‘netdev_alloc_pcpu_stats’ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); ^ CC [M] net/ipv6/sit.o In file included from net/ipv6/sit.c:30:0: net/ipv6/sit.c: In function ‘ipip6_tunnel_init’: include/linux/netdevice.h:2029:18: warning: variable ‘stat’ set but not used [-Wunused-but-set-variable] typeof(type) *stat; \ ^ Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/u64_stats_sync.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index 4b4439e75f45..df89c9bcba7d 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -68,11 +68,12 @@ struct u64_stats_sync { }; +static inline void u64_stats_init(struct u64_stats_sync *syncp) +{ #if BITS_PER_LONG == 32 && defined(CONFIG_SMP) -# define u64_stats_init(syncp) seqcount_init(syncp.seq) -#else -# define u64_stats_init(syncp) do { } while (0) + seqcount_init(&syncp->seq); #endif +} static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) { -- cgit v1.2.3 From 47d8417f5914012c794684f651213ffae1b91619 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 15 Jun 2015 17:58:58 +0300 Subject: net/mlx4_core: Add sink counter Reserve the last valid counter index for "sink" counter, when a new counter cannot be allocated, the driver will use this counter. In order to avoid allocating this counter on any other flow, fix the indices bitmap allocation range, and reserve the sink counter index. Add macro for the sink counter index and replace all appearences of the index with the macro. Signed-off-by: Eran Ben Elisha Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index ad31e476873f..312c50420dde 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -957,6 +957,7 @@ struct mlx4_mad_ifc { ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)) #define MLX4_INVALID_SLAVE_ID 0xFF +#define MLX4_SINK_COUNTER_INDEX(dev) (dev->caps.max_counters - 1) void handle_port_mgmt_change_event(struct work_struct *work); -- cgit v1.2.3 From 6de5f7f6a1fa2288552d46b3effbb6d5571413e5 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 15 Jun 2015 17:59:02 +0300 Subject: net/mlx4_core: Allocate default counter per port Default counter per port will be allocated at the mlx4 core driver load. Every QP opened by the Ethernet driver will be attached to the port's default counter. This is an infrastructure step to collect VF statistics from the PF. Signed-off-by: Eran Ben Elisha Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 312c50420dde..4820080ac394 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1348,6 +1348,7 @@ int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port); int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx); void mlx4_counter_free(struct mlx4_dev *dev, u32 idx); +int mlx4_get_default_counter_index(struct mlx4_dev *dev, int port); void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry, int port); -- cgit v1.2.3 From 9616982f3fcc9e6577d7f41009c4ef2df19a71ec Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 15 Jun 2015 17:59:05 +0300 Subject: net/mlx4_core: Add helper to query counters This is an infrastructure step for querying VF and PF counters. This code was in the IB driver, move it to the mlx4 core driver so it will be accessible for more use cases. Signed-off-by: Eran Ben Elisha Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/cmd.h | 3 +++ include/linux/mlx4/device.h | 8 ++++++++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index f62e7cf227c6..5dffc869988b 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -35,6 +35,7 @@ #include #include +#include enum { /* initialization and general commands */ @@ -300,6 +301,8 @@ static inline int mlx4_cmd_imm(struct mlx4_dev *dev, u64 in_param, u64 *out_para struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev); void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox); +int mlx4_get_counter_stats(struct mlx4_dev *dev, int counter_index, + struct mlx4_counter *counter_stats, int reset); u32 mlx4_comm_get_version(void); int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac); int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 4820080ac394..efe80c754b2f 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -771,6 +771,14 @@ union mlx4_ext_av { struct mlx4_eth_av eth; }; +/* Counters should be saturate once they reach their maximum value */ +#define ASSIGN_32BIT_COUNTER(counter, value) do { \ + if ((value) > U32_MAX) \ + counter = cpu_to_be32(U32_MAX); \ + else \ + counter = cpu_to_be32(value); \ +} while (0) + struct mlx4_counter { u8 reserved1[3]; u8 counter_mode; -- cgit v1.2.3 From 3b766cd832328fcb87db3507e7b98cf42f21689d Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 15 Jun 2015 17:59:07 +0300 Subject: net/core: Add reading VF statistics through the PF netdevice Add ndo_get_vf_stats where the PF retrieves and fills the VFs traffic statistics. We encode the VF stats in a nested manner to allow for future extensions. Signed-off-by: Eran Ben Elisha Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/if_link.h | 9 +++++++++ include/linux/netdevice.h | 4 ++++ 2 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index da4929927f69..ae5d0d22955d 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -5,6 +5,15 @@ /* We don't want this structure exposed to user space */ +struct ifla_vf_stats { + __u64 rx_packets; + __u64 tx_packets; + __u64 rx_bytes; + __u64 tx_bytes; + __u64 broadcast; + __u64 multicast; +}; + struct ifla_vf_info { __u32 vf; __u8 mac[32]; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6f5f71ff5169..e20979dfd6a9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1100,6 +1100,10 @@ struct net_device_ops { struct ifla_vf_info *ivf); int (*ndo_set_vf_link_state)(struct net_device *dev, int vf, int link_state); + int (*ndo_get_vf_stats)(struct net_device *dev, + int vf, + struct ifla_vf_stats + *vf_stats); int (*ndo_set_vf_port)(struct net_device *dev, int vf, struct nlattr *port[]); -- cgit v1.2.3 From 62a890557f57e6cbebe9cc6c32aef045405d4fa2 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 15 Jun 2015 17:59:08 +0300 Subject: net/mlx4_en: Support ndo_get_vf_stats Implement the ndo to gather VF statistics through the PF. All counters related to this VF are stored in a per slave list, run over the slave's list and collect all statistics. Signed-off-by: Eran Ben Elisha Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/cmd.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 5dffc869988b..58391f2e0414 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -36,6 +36,7 @@ #include #include #include +#include enum { /* initialization and general commands */ @@ -303,6 +304,8 @@ void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbo int mlx4_get_counter_stats(struct mlx4_dev *dev, int counter_index, struct mlx4_counter *counter_stats, int reset); +int mlx4_get_vf_stats(struct mlx4_dev *dev, int port, int vf_idx, + struct ifla_vf_stats *vf_stats); u32 mlx4_comm_get_version(void); int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac); int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos); -- cgit v1.2.3 From eb4cb008529ca08e0d8c0fa54e8f739520197a65 Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Mon, 15 Jun 2015 11:26:18 -0400 Subject: sock_diag: define destruction multicast groups These groups will contain socket-destruction events for AF_INET/AF_INET6, IPPROTO_TCP/IPPROTO_UDP. Near the end of socket destruction, a check for listeners is performed. In the presence of a listener, rather than completely cleanup the socket, a unit of work will be added to a private work queue which will first broadcast information about the socket and then finish the cleanup operation. Signed-off-by: Craig Gallek Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/sock_diag.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index 083ac388098e..fddebc617469 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -1,7 +1,10 @@ #ifndef __SOCK_DIAG_H__ #define __SOCK_DIAG_H__ +#include #include +#include +#include #include struct sk_buff; @@ -11,6 +14,7 @@ struct sock; struct sock_diag_handler { __u8 family; int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh); + int (*get_info)(struct sk_buff *skb, struct sock *sk); }; int sock_diag_register(const struct sock_diag_handler *h); @@ -26,4 +30,42 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr); int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk, struct sk_buff *skb, int attrtype); +static inline +enum sknetlink_groups sock_diag_destroy_group(const struct sock *sk) +{ + switch (sk->sk_family) { + case AF_INET: + switch (sk->sk_protocol) { + case IPPROTO_TCP: + return SKNLGRP_INET_TCP_DESTROY; + case IPPROTO_UDP: + return SKNLGRP_INET_UDP_DESTROY; + default: + return SKNLGRP_NONE; + } + case AF_INET6: + switch (sk->sk_protocol) { + case IPPROTO_TCP: + return SKNLGRP_INET6_TCP_DESTROY; + case IPPROTO_UDP: + return SKNLGRP_INET6_UDP_DESTROY; + default: + return SKNLGRP_NONE; + } + default: + return SKNLGRP_NONE; + } +} + +static inline +bool sock_diag_has_destroy_listeners(const struct sock *sk) +{ + const struct net *n = sock_net(sk); + const enum sknetlink_groups group = sock_diag_destroy_group(sk); + + return group != SKNLGRP_NONE && n->diag_nlsk && + netlink_has_listeners(n->diag_nlsk, group); +} +void sock_diag_broadcast_destroy(struct sock *sk); + #endif -- cgit v1.2.3 From 3fd22af808f4d7455ba91596d334438c7ee0f889 Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Mon, 15 Jun 2015 11:26:19 -0400 Subject: sock_diag: specify info_size per inet protocol Previously, there was no clear distinction between the inet protocols that used struct tcp_info to report information and those that didn't. This change adds a specific size attribute to the inet_diag_handler struct which defines these interfaces. This will make dispatching sock_diag get_info requests identical for all inet protocols in a following patch. Tested: ss -au Tested: ss -at Signed-off-by: Craig Gallek Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index ac48b10c9395..0e707f0c1a3e 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -24,6 +24,7 @@ struct inet_diag_handler { struct inet_diag_msg *r, void *info); __u16 idiag_type; + __u16 idiag_info_size; }; struct inet_connection_sock; -- cgit v1.2.3 From d37e296979ed1652aec6850e2d736bd0ebf0cdb1 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Mon, 15 Jun 2015 13:18:36 -0700 Subject: MPILIB: add mpi_read_buf() and mpi_get_size() helpers Added a mpi_read_buf() helper function to export MPI to a buf provided by the user, and a mpi_get_size() helper, that tells the user how big the buf is. Changed mpi_free to use kzfree instead of kfree because it is used to free crypto keys. Signed-off-by: Tadeusz Struk Signed-off-by: Herbert Xu --- include/linux/mpi.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mpi.h b/include/linux/mpi.h index 5af1b81def49..641b7d6fd096 100644 --- a/include/linux/mpi.h +++ b/include/linux/mpi.h @@ -81,6 +81,8 @@ MPI mpi_read_from_buffer(const void *buffer, unsigned *ret_nread); int mpi_fromstr(MPI val, const char *str); u32 mpi_get_keyid(MPI a, u32 *keyid); void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign); +int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes, + int *sign); void *mpi_get_secure_buffer(MPI a, unsigned *nbytes, int *sign); int mpi_set_buffer(MPI a, const void *buffer, unsigned nbytes, int sign); @@ -142,4 +144,17 @@ int mpi_rshift(MPI x, MPI a, unsigned n); /*-- mpi-inv.c --*/ int mpi_invm(MPI x, MPI u, MPI v); +/* inline functions */ + +/** + * mpi_get_size() - returns max size required to store the number + * + * @a: A multi precision integer for which we want to allocate a bufer + * + * Return: size required to store the number + */ +static inline unsigned int mpi_get_size(MPI a) +{ + return a->nlimbs * BYTES_PER_MPI_LIMB; +} #endif /*G10_MPI_H */ -- cgit v1.2.3 From c7cfc94096db28d3072b402c224eb50349926e24 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 1 Jun 2015 16:05:10 +0800 Subject: genirq: Enhance irq_data_to_desc() to support hierarchy irqdomain For irq associated with hierarchy irqdomains, there will be multiple irq_datas for one irq_desc. So enhance irq_data_to_desc() to support hierarchy irqdomain. Also export irq_data_to_desc() as an inline function for later reuse. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Marc Zyngier Link: http://lkml.kernel.org/r/1433145945-789-2-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- include/linux/irqdesc.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index dd1109fb241e..a113a8dc7438 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -93,6 +93,15 @@ struct irq_desc { extern struct irq_desc irq_desc[NR_IRQS]; #endif +static inline struct irq_desc *irq_data_to_desc(struct irq_data *data) +{ +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + return irq_to_desc(data->irq); +#else + return container_of(data, struct irq_desc, irq_data); +#endif +} + static inline struct irq_data *irq_desc_get_irq_data(struct irq_desc *desc) { return &desc->irq_data; -- cgit v1.2.3 From 4158c2eca3c77ed3cccdcaeab153aad4e433369c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 12 Jun 2015 10:14:02 +0200 Subject: iommu/vt-d: Detect pre enabled translation Add code to detect whether translation is already enabled in the IOMMU. Save this state in a flags field added to struct intel_iommu. Tested-by: ZhenHua Li Tested-by: Baoquan He Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index a240e61a7700..b85b81ad5eba 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -320,6 +320,9 @@ enum { MAX_SR_DMAR_REGS }; +#define VTD_FLAG_TRANS_PRE_ENABLED (1 << 0) +#define VTD_FLAG_IRQ_REMAP_PRE_ENABLED (1 << 1) + struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 reg_phys; /* physical address of hw register set */ @@ -351,6 +354,7 @@ struct intel_iommu { #endif struct device *iommu_dev; /* IOMMU-sysfs device */ int node; + u32 flags; /* Software defined flags */ }; static inline void __iommu_flush_cache( -- cgit v1.2.3 From af3b358e48115588d905cc07a47b3f356e0d01d1 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 12 Jun 2015 15:00:21 +0200 Subject: iommu/vt-d: Copy IR table from old kernel when in kdump mode When we are booting into a kdump kernel and find IR enabled, copy over the contents of the previous IR table so that spurious interrupts will not be target aborted. Tested-by: ZhenHua Li Tested-by: Baoquan He Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index b85b81ad5eba..9e14edcf7f6e 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -296,6 +296,7 @@ struct q_inval { /* 1MB - maximum possible interrupt remapping table size */ #define INTR_REMAP_PAGE_ORDER 8 #define INTR_REMAP_TABLE_REG_SIZE 0xf +#define INTR_REMAP_TABLE_REG_SIZE_MASK 0xf #define INTR_REMAP_TABLE_ENTRIES 65536 -- cgit v1.2.3 From 764ad8ba8cd4c6f836fca9378f8c5121aece0842 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 9 Jun 2015 19:43:56 -0400 Subject: nfs: increase size of EXCHANGE_ID name string buffer The current buffer is much too small if you have a relatively long hostname. Bring it up to the size of the one that SETCLIENTID has. Cc: Reported-by: Michael Skralivetsky Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 93ab6071bbe9..e9e9a8dcfb47 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1142,7 +1142,7 @@ struct nfs41_state_protection { struct nfs4_op_map allow; }; -#define NFS4_EXCHANGE_ID_LEN (48) +#define NFS4_EXCHANGE_ID_LEN (127) struct nfs41_exchange_id_args { struct nfs_client *client; nfs4_verifier *verifier; -- cgit v1.2.3 From 3a6bb738792500e8b4534c0350c13a132bac0492 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 9 Jun 2015 19:43:57 -0400 Subject: nfs: convert setclientid and exchange_id encoders to use clp->cl_owner_id ...instead of buffers that are part of their arg structs. We already hold a reference to the client, so we might as well use the allocated buffer. In the event that we can't allocate the clp->cl_owner_id, then just return -ENOMEM. Note too that we switch from a GFP_KERNEL allocation here to GFP_NOFS. It's possible we could end up trying to do a SETCLIENTID or EXCHANGE_ID in order to reclaim some memory, and the GFP_KERNEL allocations in the existing code could cause recursion back into NFS reclaim. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index e9e9a8dcfb47..6c0b423d781b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -994,7 +994,7 @@ struct nfs4_setclientid { char sc_netid[RPCBIND_MAXNETIDLEN + 1]; unsigned int sc_uaddr_len; char sc_uaddr[RPCBIND_MAXUADDRLEN + 1]; - u32 sc_cb_ident; + struct nfs_client *sc_clnt; struct rpc_cred *sc_cred; }; -- cgit v1.2.3 From 873e385116b2cc5c7daca8f51881371fadb90970 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 9 Jun 2015 19:44:00 -0400 Subject: nfs: make nfs4_init_uniform_client_string use a dynamically allocated buffer Change the uniform client string generator to dynamically allocate the NFSv4 client name string buffer. With this patch, we can eliminate the buffers that are embedded within the "args" structs and simply use the name string that is hanging off the client. This uniform string case is a little simpler than the nonuniform since we don't need to deal with RCU, but we do have two different cases, depending on whether there is a uniquifier or not. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 6c0b423d781b..c959e7eb5bd4 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -984,11 +984,8 @@ struct nfs4_readlink_res { struct nfs4_sequence_res seq_res; }; -#define NFS4_SETCLIENTID_NAMELEN (127) struct nfs4_setclientid { const nfs4_verifier * sc_verifier; - unsigned int sc_name_len; - char sc_name[NFS4_SETCLIENTID_NAMELEN + 1]; u32 sc_prog; unsigned int sc_netid_len; char sc_netid[RPCBIND_MAXNETIDLEN + 1]; @@ -1142,12 +1139,9 @@ struct nfs41_state_protection { struct nfs4_op_map allow; }; -#define NFS4_EXCHANGE_ID_LEN (127) struct nfs41_exchange_id_args { struct nfs_client *client; nfs4_verifier *verifier; - unsigned int id_len; - char id[NFS4_EXCHANGE_ID_LEN]; u32 flags; struct nfs41_state_protection state_protect; }; -- cgit v1.2.3 From 22c1587adfed1977d26a57ac2831d03e37f8f805 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Mon, 27 Apr 2015 17:37:53 -0400 Subject: init: delete the __cpuinit related stubs The __cpuinit support was removed several releases ago in 3.11-rc1 with commit 22f0a27367742f65130c0fb25ef00f7297e032c1 ("init.h: remove __cpuinit sections from the kernel") People have had a chance to update their out of tree code, so now we remove the no-op stubs to ensure no more new use cases can creep back in. Also delete the mention of __cpuinitdata from the tag script. Signed-off-by: Paul Gortmaker --- include/linux/init.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init.h b/include/linux/init.h index 21b6d768edd7..7c68c36d3fd8 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -91,14 +91,6 @@ #define __exit __section(.exit.text) __exitused __cold notrace -/* temporary, until all users are removed */ -#define __cpuinit -#define __cpuinitdata -#define __cpuinitconst -#define __cpuexit -#define __cpuexitdata -#define __cpuexitconst - /* Used for MEMORY_HOTPLUG */ #define __meminit __section(.meminit.text) __cold notrace #define __meminitdata __section(.meminit.data) @@ -116,9 +108,6 @@ #define __INITRODATA .section ".init.rodata","a",%progbits #define __FINITDATA .previous -/* temporary, until all users are removed */ -#define __CPUINIT - #define __MEMINIT .section ".meminit.text", "ax" #define __MEMINITDATA .section ".meminit.data", "aw" #define __MEMINITRODATA .section ".meminit.rodata", "a" -- cgit v1.2.3 From f309d4443130bf814e991f836e919dca22df37ae Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Fri, 1 May 2015 20:10:57 -0400 Subject: platform_device: better support builtin boilerplate avoidance We have macros that help reduce the boilerplate for modules that register with no extra init/exit complexity other than the most standard use case. However we see an increasing number of non-modular drivers using these modular_driver() type register functions. There are several downsides to this: 1) The code can appear modular to a reader of the code, and they won't know if the code really is modular without checking the Makefile and Kconfig to see if compilation is governed by a bool or tristate. 2) Coders of drivers may be tempted to code up an __exit function that is never used, just in order to satisfy the required three args of the modular registration function. 3) Non-modular code ends up including the which increases CPP overhead that they don't need. 4) It hinders us from performing better separation of the module init code and the generic init code. Here we introduce similar macros, with the mapping from module_driver to builtin_driver and similar, so that simple changes of: module_platform_driver() ---> builtin_platform_driver() module_platform_driver_probe() ---> builtin_platform_driver_probe(). can help us avoid #3 above, without having to code up the same __init functions and device_initcall() boilerplate. For non modular code, module_init becomes __initcall. But direct use of __initcall is discouraged, vs. one of the priority categorized subgroups. As __initcall gets mapped onto device_initcall, our use of device_initcall directly in this change means that the runtime impact is zero -- drivers will remain at level 6 in the initcall ordering. Cc: Greg Kroah-Hartman Signed-off-by: Paul Gortmaker --- include/linux/device.h | 22 ++++++++++++++++++++++ include/linux/platform_device.h | 23 +++++++++++++++++++++++ 2 files changed, 45 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 6558af90c8fe..c2d6167cb4a3 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1269,4 +1269,26 @@ static void __exit __driver##_exit(void) \ } \ module_exit(__driver##_exit); +/** + * builtin_driver() - Helper macro for drivers that don't do anything + * special in init and have no exit. This eliminates some boilerplate. + * Each driver may only use this macro once, and calling it replaces + * device_initcall (or in some cases, the legacy __initcall). This is + * meant to be a direct parallel of module_driver() above but without + * the __exit stuff that is not used for builtin cases. + * + * @__driver: driver name + * @__register: register function for this driver type + * @...: Additional arguments to be passed to __register + * + * Use this macro to construct bus specific macros for registering + * drivers, and do not use it on its own. + */ +#define builtin_driver(__driver, __register, ...) \ +static int __init __driver##_init(void) \ +{ \ + return __register(&(__driver) , ##__VA_ARGS__); \ +} \ +device_initcall(__driver##_init); + #endif /* _DEVICE_H_ */ diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 58f1e75ba105..bba08f44cc97 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -222,6 +222,15 @@ static inline void platform_set_drvdata(struct platform_device *pdev, module_driver(__platform_driver, platform_driver_register, \ platform_driver_unregister) +/* builtin_platform_driver() - Helper macro for builtin drivers that + * don't do anything special in driver init. This eliminates some + * boilerplate. Each driver may only use this macro once, and + * calling it replaces device_initcall(). Note this is meant to be + * a parallel of module_platform_driver() above, but w/o _exit stuff. + */ +#define builtin_platform_driver(__platform_driver) \ + builtin_driver(__platform_driver, platform_driver_register) + /* module_platform_driver_probe() - Helper macro for drivers that don't do * anything special in module init/exit. This eliminates a lot of * boilerplate. Each module may only use this macro once, and @@ -240,6 +249,20 @@ static void __exit __platform_driver##_exit(void) \ } \ module_exit(__platform_driver##_exit); +/* builtin_platform_driver_probe() - Helper macro for drivers that don't do + * anything special in device init. This eliminates some boilerplate. Each + * driver may only use this macro once, and using it replaces device_initcall. + * This is meant to be a parallel of module_platform_driver_probe above, but + * without the __exit parts. + */ +#define builtin_platform_driver_probe(__platform_driver, __platform_probe) \ +static int __init __platform_driver##_init(void) \ +{ \ + return platform_driver_probe(&(__platform_driver), \ + __platform_probe); \ +} \ +device_initcall(__platform_driver##_init); \ + #define platform_create_bundle(driver, probe, res, n_res, data, size) \ __platform_create_bundle(driver, probe, res, n_res, data, size, THIS_MODULE) extern struct platform_device *__platform_create_bundle( -- cgit v1.2.3 From fec47d863587c272d6fbf4e50066209c953d5e60 Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Fri, 22 May 2015 15:45:27 -0500 Subject: remoteproc: introduce rproc_get_by_phandle API Allow users of remoteproc the ability to get a handle to an rproc by passing a phandle supplied in the user's device tree node. This is useful in situations that require manual booting of the rproc. This patch uses the code removed by commit 40e575b1d0b3 ("remoteproc: remove the get_by_name/put API") for the ref counting but is modified to use a simple list and locking mechanism and has rproc_get_by_name replaced with an rproc_get_by_phandle API. Signed-off-by: Dave Gerlach Signed-off-by: Suman Anna [fix order of Signed-off-by tags] Signed-off-by: Ohad Ben-Cohen --- include/linux/remoteproc.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 78b8a9b9d40a..56739e5df1e6 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -36,11 +36,11 @@ #define REMOTEPROC_H #include -#include #include #include #include #include +#include /** * struct resource_table - firmware resource table header @@ -375,7 +375,7 @@ enum rproc_crash_type { /** * struct rproc - represents a physical remote processor device - * @node: klist node of this rproc object + * @node: list node of this rproc object * @domain: iommu domain * @name: human readable name of the rproc * @firmware: name of firmware file to be loaded @@ -407,7 +407,7 @@ enum rproc_crash_type { * @has_iommu: flag to indicate if remote processor is behind an MMU */ struct rproc { - struct klist_node node; + struct list_head node; struct iommu_domain *domain; const char *name; const char *firmware; @@ -481,6 +481,7 @@ struct rproc_vdev { u32 rsc_offset; }; +struct rproc *rproc_get_by_phandle(phandle phandle); struct rproc *rproc_alloc(struct device *dev, const char *name, const struct rproc_ops *ops, const char *firmware, int len); -- cgit v1.2.3 From 9f3520c3115b451ac1301779fc3c769d94907a70 Mon Sep 17 00:00:00 2001 From: Yuanhan Liu Date: Fri, 8 May 2015 18:19:05 +1000 Subject: wait: introduce wait_event_exclusive_cmd It's just a variant of wait_event_cmd(), with exclusive flag being set. For cases like RAID5, which puts many processes to sleep until 1/4 resources are free, a wake_up wakes up all processes to run, but there is one process being able to get the resource as it's protected by a spin lock. That ends up introducing heavy lock contentions, and hurts performance badly. Here introduce wait_event_exclusive_cmd to relieve the lock contention naturally by letting wake_up just wake up one process. Cc: Ingo Molnar Cc: Peter Zijlstra v2: its assumed that wait*() and __wait*() have the same arguments - peterz Acked-by: Peter Zijlstra (Intel) Signed-off-by: Yuanhan Liu Signed-off-by: NeilBrown --- include/linux/wait.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index 2db83349865b..db78c7204947 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -358,6 +358,19 @@ do { \ __ret; \ }) +#define __wait_event_exclusive_cmd(wq, condition, cmd1, cmd2) \ + (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 1, 0, \ + cmd1; schedule(); cmd2) +/* + * Just like wait_event_cmd(), except it sets exclusive flag + */ +#define wait_event_exclusive_cmd(wq, condition, cmd1, cmd2) \ +do { \ + if (condition) \ + break; \ + __wait_event_exclusive_cmd(wq, condition, cmd1, cmd2); \ +} while (0) + #define __wait_event_cmd(wq, condition, cmd1, cmd2) \ (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ cmd1; schedule(); cmd2) -- cgit v1.2.3 From a01f7cd657c95941079d548ef7fbf0cc370c1259 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 22 May 2015 15:45:28 -0500 Subject: remoteproc: add a rproc ops for performing address translation The rproc_da_to_va API is currently used to perform any device to kernel address translations to meet the different needs of the remoteproc core/drivers (eg: loading). The functionality is achieved within the remoteproc core, and is limited only for carveouts allocated within the core. A new rproc ops, da_to_va, is added to provide flexibility to platform implementations to perform the address translation themselves when the above conditions cannot be met by the implementations. The rproc_da_to_va() API is extended to invoke this ops if present, and fallback to regular processing if the platform implementation cannot provide the translation. This will allow any remoteproc implementations to translate addresses for dedicated memories like internal memories. While at this, also update the rproc_da_to_va() documentation since it is an exported function. Signed-off-by: Suman Anna Signed-off-by: Dave Gerlach Signed-off-by: Ohad Ben-Cohen --- include/linux/remoteproc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 56739e5df1e6..9c4e1384f636 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -330,11 +330,13 @@ struct rproc; * @start: power on the device and boot it * @stop: power off the device * @kick: kick a virtqueue (virtqueue id given as a parameter) + * @da_to_va: optional platform hook to perform address translations */ struct rproc_ops { int (*start)(struct rproc *rproc); int (*stop)(struct rproc *rproc); void (*kick)(struct rproc *rproc, int vqid); + void * (*da_to_va)(struct rproc *rproc, u64 da, int len); }; /** -- cgit v1.2.3 From a01bc0d5f557bd8becd0ba75d09c39192004697e Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Fri, 22 May 2015 15:45:30 -0500 Subject: remoteproc/wkup_m3: add a remoteproc driver for TI Wakeup M3 Add a remoteproc driver to load the firmware and boot a small Wakeup M3 processor present on TI AM33xx and AM43xx SoCs. This Wakeup M3 remote processor is an integrated Cortex M3 that allows the SoC to enter the lowest possible power state by taking control from the MPU after it has gone into its own low power state and shutting off any additional peripherals. The Wakeup M3 processor has two internal memory regions - 16 kB of unified instruction memory called UMEM used to store executable code, and 8 kB of data memory called DMEM used for all data sections. The Wakeup M3 processor executes its code entirely from within the UMEM and uses the DMEM for any data. It does not use any external memory or any other external resources. The device address view has the UMEM at address 0x0 and DMEM at address 0x80000, and these are computed automatically within the driver based on relative address calculation from the corresponding device tree IOMEM resources. These device addresses are used to aid the core remoteproc ELF loader code to properly translate and load the firmware segments through the .rproc_da_to_va ops. Signed-off-by: Dave Gerlach Signed-off-by: Suman Anna Signed-off-by: Ohad Ben-Cohen --- include/linux/platform_data/wkup_m3.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 include/linux/platform_data/wkup_m3.h (limited to 'include/linux') diff --git a/include/linux/platform_data/wkup_m3.h b/include/linux/platform_data/wkup_m3.h new file mode 100644 index 000000000000..3f1d77effd71 --- /dev/null +++ b/include/linux/platform_data/wkup_m3.h @@ -0,0 +1,30 @@ +/* + * TI Wakeup M3 remote processor platform data + * + * Copyright (C) 2014-2015 Texas Instruments, Inc. + * + * Dave Gerlach + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _LINUX_PLATFORM_DATA_WKUP_M3_H +#define _LINUX_PLATFORM_DATA_WKUP_M3_H + +struct platform_device; + +struct wkup_m3_platform_data { + const char *reset_name; + + int (*assert_reset)(struct platform_device *pdev, const char *name); + int (*deassert_reset)(struct platform_device *pdev, const char *name); +}; + +#endif /* _LINUX_PLATFORM_DATA_WKUP_M3_H */ -- cgit v1.2.3 From 3c339ab83fc09d9d91fb7e8b4a60e8ddc91de417 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Tue, 16 Jun 2015 10:30:55 -0700 Subject: crypto: akcipher - add PKE API Add Public Key Encryption API. Signed-off-by: Tadeusz Struk Made CRYPTO_AKCIPHER invisible like other type config options. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 1 + include/linux/cryptouser.h | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 25a4b71d6d1f..0e3f71a73e3b 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -53,6 +53,7 @@ #define CRYPTO_ALG_TYPE_SHASH 0x00000009 #define CRYPTO_ALG_TYPE_AHASH 0x0000000a #define CRYPTO_ALG_TYPE_RNG 0x0000000c +#define CRYPTO_ALG_TYPE_AKCIPHER 0x0000000d #define CRYPTO_ALG_TYPE_PCOMPRESS 0x0000000f #define CRYPTO_ALG_TYPE_HASH_MASK 0x0000000e diff --git a/include/linux/cryptouser.h b/include/linux/cryptouser.h index 4abf2ea6a887..36efbbbf2f83 100644 --- a/include/linux/cryptouser.h +++ b/include/linux/cryptouser.h @@ -43,6 +43,7 @@ enum crypto_attr_type_t { CRYPTOCFGA_REPORT_COMPRESS, /* struct crypto_report_comp */ CRYPTOCFGA_REPORT_RNG, /* struct crypto_report_rng */ CRYPTOCFGA_REPORT_CIPHER, /* struct crypto_report_cipher */ + CRYPTOCFGA_REPORT_AKCIPHER, /* struct crypto_report_akcipher */ __CRYPTOCFGA_MAX #define CRYPTOCFGA_MAX (__CRYPTOCFGA_MAX - 1) @@ -101,5 +102,9 @@ struct crypto_report_rng { unsigned int seedsize; }; +struct crypto_report_akcipher { + char type[CRYPTO_MAX_NAME]; +}; + #define CRYPTO_REPORT_MAXSIZE (sizeof(struct crypto_user_alg) + \ sizeof(struct crypto_report_blkcipher)) -- cgit v1.2.3 From 46b15caa7cb19b0f6e3bc8ebaee5bc1bb2e35110 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 16 Jun 2015 18:48:31 -0400 Subject: vfs, writeback: replace FS_CGROUP_WRITEBACK with SB_I_CGROUPWB FS_CGROUP_WRITEBACK indicates whether a file_system_type supports cgroup writeback; however, different super_blocks of the same file_system_type may or may not support cgroup writeback depending on filesystem options. This patch replaces FS_CGROUP_WRITEBACK with a per-super_block flag. super_block->s_flags carries some internal flags in the high bits but it's exposd to userland through uapi header and running out of space anyway. This patch adds a new field super_block->s_iflags to carry kernel-internal flags. It is currently only used by the new SB_I_CGROUPWB flag whose concatenated and abbreviated name is for consistency with other super_block flags. ext2_fill_super() is updated to set SB_I_CGROUPWB. v2: Added super_block->s_iflags instead of stealing another high bit from sb->s_flags as suggested by Christoph and Jan. Signed-off-by: Tejun Heo Cc: Alexander Viro Cc: linux-fsdevel@vger.kernel.org Cc: Christoph Hellwig Cc: Jan Kara Cc: linux-ext4@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 2 +- include/linux/fs.h | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index dfce80869145..a13181a42b9a 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -260,7 +260,7 @@ static inline bool inode_cgwb_enabled(struct inode *inode) return bdi_cap_account_dirty(bdi) && (bdi->capabilities & BDI_CAP_CGROUP_WRITEBACK) && - (inode->i_sb->s_type->fs_flags & FS_CGROUP_WRITEBACK); + (inode->i_sb->s_iflags & SB_I_CGROUPWB); } /** diff --git a/include/linux/fs.h b/include/linux/fs.h index b5e1dcfbc5e3..2c5e33a5b2af 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1241,6 +1241,8 @@ struct mm_struct; #define UMOUNT_NOFOLLOW 0x00000008 /* Don't follow symlink on umount */ #define UMOUNT_UNUSED 0x80000000 /* Flag guaranteed to be unused */ +/* sb->s_iflags */ +#define SB_I_CGROUPWB 0x00000001 /* cgroup-aware writeback enabled */ /* Possible states of 'frozen' field */ enum { @@ -1279,6 +1281,7 @@ struct super_block { const struct quotactl_ops *s_qcop; const struct export_operations *s_export_op; unsigned long s_flags; + unsigned long s_iflags; /* internal SB_I_* flags */ unsigned long s_magic; struct dentry *s_root; struct rw_semaphore s_umount; @@ -1912,7 +1915,6 @@ struct file_system_type { #define FS_HAS_SUBTYPE 4 #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */ -#define FS_CGROUP_WRITEBACK 32 /* Supports cgroup-aware writeback */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ struct dentry *(*mount) (struct file_system_type *, int, const char *, void *); -- cgit v1.2.3 From a9bd32a8b4c4c2670f9ed8cae63f9378b6df3ded Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 13 Mar 2015 11:09:45 -0700 Subject: msm: msm_fb: Remove dead code This code is no longer used now that mach-msm has been removed. Delete it. Cc: Jean-Christophe Plagniol-Villard Cc: Tomi Valkeinen Cc: David Brown Cc: Bryan Huntsman Cc: Daniel Walker Signed-off-by: Stephen Boyd Acked-by: Jean-Christophe PLAGNIOL-VILLARD Signed-off-by: Tomi Valkeinen --- include/linux/platform_data/video-msm_fb.h | 146 ----------------------------- 1 file changed, 146 deletions(-) delete mode 100644 include/linux/platform_data/video-msm_fb.h (limited to 'include/linux') diff --git a/include/linux/platform_data/video-msm_fb.h b/include/linux/platform_data/video-msm_fb.h deleted file mode 100644 index 31449be3eadb..000000000000 --- a/include/linux/platform_data/video-msm_fb.h +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Internal shared definitions for various MSM framebuffer parts. - * - * Copyright (C) 2007 Google Incorporated - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#ifndef _MSM_FB_H_ -#define _MSM_FB_H_ - -#include - -struct mddi_info; - -struct msm_fb_data { - int xres; /* x resolution in pixels */ - int yres; /* y resolution in pixels */ - int width; /* disply width in mm */ - int height; /* display height in mm */ - unsigned output_format; -}; - -struct msmfb_callback { - void (*func)(struct msmfb_callback *); -}; - -enum { - MSM_MDDI_PMDH_INTERFACE, - MSM_MDDI_EMDH_INTERFACE, - MSM_EBI2_INTERFACE, -}; - -#define MSMFB_CAP_PARTIAL_UPDATES (1 << 0) - -struct msm_panel_data { - /* turns off the fb memory */ - int (*suspend)(struct msm_panel_data *); - /* turns on the fb memory */ - int (*resume)(struct msm_panel_data *); - /* turns off the panel */ - int (*blank)(struct msm_panel_data *); - /* turns on the panel */ - int (*unblank)(struct msm_panel_data *); - void (*wait_vsync)(struct msm_panel_data *); - void (*request_vsync)(struct msm_panel_data *, struct msmfb_callback *); - void (*clear_vsync)(struct msm_panel_data *); - /* from the enum above */ - unsigned interface_type; - /* data to be passed to the fb driver */ - struct msm_fb_data *fb_data; - - /* capabilities supported by the panel */ - uint32_t caps; -}; - -struct msm_mddi_client_data { - void (*suspend)(struct msm_mddi_client_data *); - void (*resume)(struct msm_mddi_client_data *); - void (*activate_link)(struct msm_mddi_client_data *); - void (*remote_write)(struct msm_mddi_client_data *, uint32_t val, - uint32_t reg); - uint32_t (*remote_read)(struct msm_mddi_client_data *, uint32_t reg); - void (*auto_hibernate)(struct msm_mddi_client_data *, int); - /* custom data that needs to be passed from the board file to a - * particular client */ - void *private_client_data; - struct resource *fb_resource; - /* from the list above */ - unsigned interface_type; -}; - -struct msm_mddi_platform_data { - unsigned int clk_rate; - void (*power_client)(struct msm_mddi_client_data *, int on); - - /* fixup the mfr name, product id */ - void (*fixup)(uint16_t *mfr_name, uint16_t *product_id); - - struct resource *fb_resource; /*optional*/ - /* number of clients in the list that follows */ - int num_clients; - /* array of client information of clients */ - struct { - unsigned product_id; /* mfr id in top 16 bits, product id - * in lower 16 bits - */ - char *name; /* the device name will be the platform - * device name registered for the client, - * it should match the name of the associated - * driver - */ - unsigned id; /* id for mddi client device node, will also - * be used as device id of panel devices, if - * the client device will have multiple panels - * space must be left here for them - */ - void *client_data; /* required private client data */ - unsigned int clk_rate; /* optional: if the client requires a - * different mddi clk rate - */ - } client_platform_data[]; -}; - -struct mdp_blit_req; -struct fb_info; -struct mdp_device { - struct device dev; - void (*dma)(struct mdp_device *mpd, uint32_t addr, - uint32_t stride, uint32_t w, uint32_t h, uint32_t x, - uint32_t y, struct msmfb_callback *callback, int interface); - void (*dma_wait)(struct mdp_device *mdp); - int (*blit)(struct mdp_device *mdp, struct fb_info *fb, - struct mdp_blit_req *req); - void (*set_grp_disp)(struct mdp_device *mdp, uint32_t disp_id); -}; - -struct class_interface; -int register_mdp_client(struct class_interface *class_intf); - -/**** private client data structs go below this line ***/ - -struct msm_mddi_bridge_platform_data { - /* from board file */ - int (*init)(struct msm_mddi_bridge_platform_data *, - struct msm_mddi_client_data *); - int (*uninit)(struct msm_mddi_bridge_platform_data *, - struct msm_mddi_client_data *); - /* passed to panel for use by the fb driver */ - int (*blank)(struct msm_mddi_bridge_platform_data *, - struct msm_mddi_client_data *); - int (*unblank)(struct msm_mddi_bridge_platform_data *, - struct msm_mddi_client_data *); - struct msm_fb_data fb_data; -}; - - - -#endif -- cgit v1.2.3 From 208489032bdd8d4a7de50f3057c175058f271956 Mon Sep 17 00:00:00 2001 From: Chaotian Jing Date: Mon, 15 Jun 2015 19:20:48 +0800 Subject: mmc: mediatek: Add Mediatek MMC driver Add Mediatek MMC driver code Support eMMC/SD/SDIO Signed-off-by: Chaotian Jing Signed-off-by: Ulf Hansson --- include/linux/mmc/core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index de722d4e9d61..258daf914c6d 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -121,6 +121,7 @@ struct mmc_data { struct mmc_request *mrq; /* associated request */ unsigned int sg_len; /* size of scatter list */ + int sg_count; /* mapped sg entries */ struct scatterlist *sg; /* I/O scatter list */ s32 host_cookie; /* host private data */ }; -- cgit v1.2.3 From a1a56aaa0735c7821e85c37268a7c12e132415cb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Jun 2015 18:10:13 -0700 Subject: netfilter: x_tables: align per cpu xt_counter Let's force a 16 bytes alignment on xt_counter percpu allocations, so that bytes and packets sit in same cache line. xt_counter being exported to user space, we cannot add __align(16) on the structure itself. Signed-off-by: Eric Dumazet Cc: Florian Westphal Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 95693c4cebdd..1c97a2204379 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -356,7 +356,8 @@ static inline unsigned long ifname_compare_aligned(const char *_a, * so nothing needs to be done there. * * xt_percpu_counter_alloc returns the address of the percpu - * counter, or 0 on !SMP. + * counter, or 0 on !SMP. We force an alignment of 16 bytes + * so that bytes/packets share a common cache line. * * Hence caller must use IS_ERR_VALUE to check for error, this * allows us to return 0 for single core systems without forcing @@ -365,7 +366,8 @@ static inline unsigned long ifname_compare_aligned(const char *_a, static inline u64 xt_percpu_counter_alloc(void) { if (nr_cpu_ids > 1) { - void __percpu *res = alloc_percpu(struct xt_counters); + void __percpu *res = __alloc_percpu(sizeof(struct xt_counters), + sizeof(struct xt_counters)); if (res == NULL) return (u64) -ENOMEM; -- cgit v1.2.3 From 3b0f95be143bea1aa47beb20134ef82e4e4068dc Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 16 Jun 2015 23:06:20 +0100 Subject: irq: Add irq_set_chained_handler_and_data() Driver authors seem to get the ordering of irq_set_chained_handler() and irq_set_handler_data() wrong - ordering the former before the latter. This opens a race window where, if there is an interrupt pending, the handler will be called between these two calls, potentially resulting in an oops. Provide a single interface to set both of these together, especially as that's commonly what is required. Signed-off-by: Russell King Cc: Alexandre Courbot Cc: Hans Ulli Kroll Cc: Jason Cooper Cc: Lee Jones Cc: Linus Walleij Cc: Thierry Reding Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/E1Z4yzs-0002Rw-4B@rmk-PC.arm.linux.org.uk Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index de3213d271ff..42861d28fc2a 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -525,6 +525,15 @@ irq_set_chained_handler(unsigned int irq, irq_flow_handler_t handle) __irq_set_handler(irq, handle, 1, NULL); } +/* + * Set a highlevel chained flow handler and its data for a given IRQ. + * (a chained handler is automatically enabled and set to + * IRQ_NOREQUEST, IRQ_NOPROBE, and IRQ_NOTHREAD) + */ +void +irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle, + void *data); + void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set); static inline void irq_set_status_flags(unsigned int irq, unsigned long set) -- cgit v1.2.3 From 0f1b414d190724617eb1cdd615592fa8cd9d0b50 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 18 Jun 2015 18:32:02 +0200 Subject: ACPI / PNP: Avoid conflicting resource reservations Commit b9a5e5e18fbf "ACPI / init: Fix the ordering of acpi_reserve_resources()" overlooked the fact that the memory and/or I/O regions reserved by acpi_reserve_resources() may conflict with those reserved by the PNP "system" driver. If that conflict actually takes place, it causes the reservations made by the "system" driver to fail while before commit b9a5e5e18fbf all reservations made by it and by acpi_reserve_resources() would be successful. In turn, that allows the resources that haven't been reserved by the "system" driver to be used by others (e.g. PCI) which sometimes leads to functional problems (up to and including boot failures). To fix that issue, introduce a common resource reservation routine, acpi_reserve_region(), to be used by both acpi_reserve_resources() and the "system" driver, that will track all resources reserved by it and avoid making conflicting requests. Link: https://bugzilla.kernel.org/show_bug.cgi?id=99831 Link: http://marc.info/?t=143389402600001&r=1&w=2 Fixes: b9a5e5e18fbf "ACPI / init: Fix the ordering of acpi_reserve_resources()" Reported-by: Roland Dreier Cc: All applicable Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index e4da5e35e29c..299763df1b27 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -332,6 +332,9 @@ int acpi_check_region(resource_size_t start, resource_size_t n, int acpi_resources_are_enforced(void); +int acpi_reserve_region(u64 start, unsigned int length, u8 space_id, + unsigned long flags, char *desc); + #ifdef CONFIG_HIBERNATION void __init acpi_no_s4_hw_signature(void); #endif @@ -525,6 +528,13 @@ static inline int acpi_check_region(resource_size_t start, resource_size_t n, return 0; } +static inline int acpi_reserve_region(u64 start, unsigned int length, + u8 space_id, unsigned long flags, + char *desc) +{ + return -ENXIO; +} + struct acpi_table_header; static inline int acpi_table_parse(char *id, int (*handler)(struct acpi_table_header *)) -- cgit v1.2.3 From a263653ed798216c0069922d7b5237ca49436007 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 17 Jun 2015 10:28:27 -0500 Subject: netfilter: don't pull include/linux/netfilter.h from netns headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This pulls the full hook netfilter definitions from all those that include net_namespace.h. Instead let's just include the bare minimum required in the new linux/netfilter_defs.h file, and use it from the netfilter netns header files. I also needed to include in.h and in6.h from linux/netfilter.h otherwise we hit this compilation error: In file included from include/linux/netfilter_defs.h:4:0, from include/net/netns/netfilter.h:4, from include/net/net_namespace.h:22, from include/linux/netdevice.h:43, from net/netfilter/nfnetlink_queue_core.c:23: include/uapi/linux/netfilter.h:76:17: error: field ‘in’ has incomplete type struct in_addr in; And also explicit include linux/netfilter.h in several spots. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Eric W. Biederman --- include/linux/netfilter.h | 6 ++---- include/linux/netfilter_defs.h | 9 +++++++++ 2 files changed, 11 insertions(+), 4 deletions(-) create mode 100644 include/linux/netfilter_defs.h (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index f5ff5d156da8..00050dfd9f23 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -10,7 +10,8 @@ #include #include #include -#include +#include + #ifdef CONFIG_NETFILTER static inline int NF_DROP_GETERR(int verdict) { @@ -38,9 +39,6 @@ static inline void nf_inet_addr_mask(const union nf_inet_addr *a1, int netfilter_init(void); -/* Largest hook number + 1 */ -#define NF_MAX_HOOKS 8 - struct sk_buff; struct nf_hook_ops; diff --git a/include/linux/netfilter_defs.h b/include/linux/netfilter_defs.h new file mode 100644 index 000000000000..d3a7f8597e82 --- /dev/null +++ b/include/linux/netfilter_defs.h @@ -0,0 +1,9 @@ +#ifndef __LINUX_NETFILTER_CORE_H_ +#define __LINUX_NETFILTER_CORE_H_ + +#include + +/* Largest hook number + 1, see uapi/linux/netfilter_decnet.h */ +#define NF_MAX_HOOKS 8 + +#endif -- cgit v1.2.3 From dcb8f5c8139ef945cdfd55900fae265c4dbefc02 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 17 Jun 2015 23:58:28 +0200 Subject: netfilter: xtables: fix warnings on 32bit platforms On 32bit archs gcc complains due to cast from void* to u64. Add intermediate casts to long to silence these warnings. include/linux/netfilter/x_tables.h:376:10: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] include/linux/netfilter/x_tables.h:384:15: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] include/linux/netfilter/x_tables.h:391:23: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] include/linux/netfilter/x_tables.h:400:22: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] Fixes: 71ae0dff02d756e ("netfilter: xtables: use percpu rule counters") Reported-by: kbuild test robot Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 1c97a2204379..286098a5667f 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -372,7 +372,7 @@ static inline u64 xt_percpu_counter_alloc(void) if (res == NULL) return (u64) -ENOMEM; - return (__force u64) res; + return (u64) (__force unsigned long) res; } return 0; @@ -380,14 +380,14 @@ static inline u64 xt_percpu_counter_alloc(void) static inline void xt_percpu_counter_free(u64 pcnt) { if (nr_cpu_ids > 1) - free_percpu((void __percpu *) pcnt); + free_percpu((void __percpu *) (unsigned long) pcnt); } static inline struct xt_counters * xt_get_this_cpu_counter(struct xt_counters *cnt) { if (nr_cpu_ids > 1) - return this_cpu_ptr((void __percpu *) cnt->pcnt); + return this_cpu_ptr((void __percpu *) (unsigned long) cnt->pcnt); return cnt; } @@ -396,7 +396,7 @@ static inline struct xt_counters * xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu) { if (nr_cpu_ids > 1) - return per_cpu_ptr((void __percpu *) cnt->pcnt, cpu); + return per_cpu_ptr((void __percpu *) (unsigned long) cnt->pcnt, cpu); return cnt; } -- cgit v1.2.3 From fb02915f47181e824339d91f8e385fd4bd746d6a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 18 Jun 2015 16:54:28 -0400 Subject: kernfs: make kernfs_get_inode() public Move kernfs_get_inode() prototype from fs/kernfs/kernfs-internal.h to include/linux/kernfs.h. It obtains the matching inode for a kernfs_node. It will be used by cgroup for inode based permission checks for now but is generally useful. Signed-off-by: Tejun Heo Acked-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 71ecdab1671b..e6b2f7db9c0c 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -277,6 +277,7 @@ void kernfs_put(struct kernfs_node *kn); struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry); struct kernfs_root *kernfs_root_from_sb(struct super_block *sb); +struct inode *kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn); struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags, void *priv); @@ -352,6 +353,10 @@ static inline struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry) static inline struct kernfs_root *kernfs_root_from_sb(struct super_block *sb) { return NULL; } +static inline struct inode * +kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn) +{ return NULL; } + static inline struct kernfs_root * kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags, void *priv) -- cgit v1.2.3 From 187fe84067bd377047cfcb7f2bbc7c9dc12d290c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 18 Jun 2015 16:54:28 -0400 Subject: cgroup: require write perm on common ancestor when moving processes on the default hierarchy On traditional hierarchies, if a task has write access to "tasks" or "cgroup.procs" file of a cgroup and its euid agrees with the target, it can move the target to the cgroup; however, consider the following scenario. The owner of each cgroup is in the parentheses. R (root) - 0 (root) - 00 (user1) - 000 (user1) | \ 001 (user1) \ 1 (root) - 10 (user1) The subtrees of 00 and 10 are delegated to user1; however, while both subtrees may belong to the same user, it is clear that the two subtrees are to be isolated - they're under completely separate resource limits imposed by 0 and 1, respectively. Note that 0 and 1 aren't strictly necessary but added to ease illustrating the issue. If user1 is allowed to move processes between the two subtrees, the intention of the hierarchy - keeping a given group of processes under a subtree with certain resource restrictions while delegating management of the subtree - can be circumvented by user1. This happens because migration permission check doesn't consider the hierarchical nature of cgroups. To fix the issue, this patch adds an extra permission requirement when userland tries to migrate a process in the default hierarchy - the issuing task must have write access to the common ancestor of "cgroup.procs" file of the ancestor in addition to the destination's. Conceptually, the issuer must be able to move the target process from the source cgroup to the common ancestor of source and destination cgroups and then to the destination. As long as delegation is done in a proper top-down way, this guarantees that a delegatee can't smuggle processes across disjoint delegation domains. The next patch will add documentation on the delegation model on the default hierarchy. v2: Fixed missing !ret test. Spotted by Li Zefan. Signed-off-by: Tejun Heo Acked-by: Johannes Weiner Cc: Li Zefan --- include/linux/cgroup-defs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index c5588c438448..93755a629299 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -220,6 +220,7 @@ struct cgroup { int populated_cnt; struct kernfs_node *kn; /* cgroup kernfs entry */ + struct kernfs_node *procs_kn; /* kn for "cgroup.procs" */ struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */ /* -- cgit v1.2.3 From c04dca02bc73096435a5c36efd5ccb2171edcbe1 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 11 Jun 2015 14:46:44 +0200 Subject: hrtimer: Remove HRTIMER_STATE_MIGRATE I do not understand HRTIMER_STATE_MIGRATE. Unless I am totally confused it looks buggy and simply unneeded. migrate_hrtimer_list() sets it to keep hrtimer_active() == T, but this is not enough: this can fool, say, hrtimer_is_queued() in dequeue_signal(). Can't migrate_hrtimer_list() simply use HRTIMER_STATE_ENQUEUED? This fixes the race and we can kill STATE_MIGRATE. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: ktkhai@parallels.com Cc: rostedt@goodmis.org Cc: juri.lelli@gmail.com Cc: pang.xunlei@linaro.org Cc: wanpeng.li@linux.intel.com Cc: umgwanakikbuti@gmail.com Link: http://lkml.kernel.org/r/20150611124743.072387650@infradead.org Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 3f82a7edc03d..2f9e57d3d126 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -70,17 +70,13 @@ enum hrtimer_restart { * the handling of the timer. * * The HRTIMER_STATE_ENQUEUED bit is always or'ed to the current state - * to preserve the HRTIMER_STATE_CALLBACK in the above scenario. This - * also affects HRTIMER_STATE_MIGRATE where the preservation is not - * necessary. HRTIMER_STATE_MIGRATE is cleared after the timer is - * enqueued on the new cpu. + * to preserve the HRTIMER_STATE_CALLBACK in the above scenario. * * All state transitions are protected by cpu_base->lock. */ #define HRTIMER_STATE_INACTIVE 0x00 #define HRTIMER_STATE_ENQUEUED 0x01 #define HRTIMER_STATE_CALLBACK 0x02 -#define HRTIMER_STATE_MIGRATE 0x04 /** * struct hrtimer - the basic hrtimer structure -- cgit v1.2.3 From a7c6f571ff51cc77d90dd54968f7c5c938c43998 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jun 2015 14:46:46 +0200 Subject: seqcount: Rename write_seqcount_barrier() I'll shortly be introducing another seqcount primitive that's useful to provide ordering semantics and would like to use the write_seqcount_barrier() name for that. Seeing how there's only one user of the current primitive, lets rename it to invalidate, as that appears what its doing. While there, employ lockdep_assert_held() instead of assert_spin_locked() to not generate debug code for regular kernels. Signed-off-by: Peter Zijlstra (Intel) Cc: ktkhai@parallels.com Cc: rostedt@goodmis.org Cc: juri.lelli@gmail.com Cc: pang.xunlei@linaro.org Cc: Oleg Nesterov Cc: wanpeng.li@linux.intel.com Cc: Paul McKenney Cc: Al Viro Cc: Linus Torvalds Cc: umgwanakikbuti@gmail.com Link: http://lkml.kernel.org/r/20150611124743.279926217@infradead.org Signed-off-by: Thomas Gleixner --- include/linux/seqlock.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 5f68d0a391ce..c07e3a536099 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -266,13 +266,13 @@ static inline void write_seqcount_end(seqcount_t *s) } /** - * write_seqcount_barrier - invalidate in-progress read-side seq operations + * write_seqcount_invalidate - invalidate in-progress read-side seq operations * @s: pointer to seqcount_t * - * After write_seqcount_barrier, no read-side seq operations will complete + * After write_seqcount_invalidate, no read-side seq operations will complete * successfully and see data older than this. */ -static inline void write_seqcount_barrier(seqcount_t *s) +static inline void write_seqcount_invalidate(seqcount_t *s) { smp_wmb(); s->sequence+=2; -- cgit v1.2.3 From c4bfa3f5f906aee2e084c5b1fb15caf876338ef8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 17 Jun 2015 14:29:24 +0200 Subject: seqcount: Introduce raw_write_seqcount_barrier() Introduce raw_write_seqcount_barrier(), a new construct that can be used to provide write barrier semantics in seqcount read loops instead of the usual consistency guarantee. raw_write_seqcount_barier() is equivalent to: raw_write_seqcount_begin(); raw_write_seqcount_end(); But avoids issueing two back-to-back smp_wmb() instructions. This construct works because the read side will 'stall' when observing odd values. This means that -- referring to the example in the comment below -- even though there is no (matching) read barrier between the loads of X and Y, we cannot observe !x && !y, because: - if we observe Y == false we must observe the first sequence increment, which makes us loop, until - we observe !(seq & 1) -- the second sequence increment -- at which time we must also observe T == true. Suggested-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: umgwanakikbuti@gmail.com Cc: ktkhai@parallels.com Cc: rostedt@goodmis.org Cc: juri.lelli@gmail.com Cc: pang.xunlei@linaro.org Cc: oleg@redhat.com Cc: wanpeng.li@linux.intel.com Cc: Al Viro Cc: Linus Torvalds Cc: Paul E. McKenney Link: http://lkml.kernel.org/r/20150617122924.GP3644@twins.programming.kicks-ass.net Signed-off-by: Thomas Gleixner --- include/linux/seqlock.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index c07e3a536099..486e685a226a 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -233,6 +233,47 @@ static inline void raw_write_seqcount_end(seqcount_t *s) s->sequence++; } +/** + * raw_write_seqcount_barrier - do a seq write barrier + * @s: pointer to seqcount_t + * + * This can be used to provide an ordering guarantee instead of the + * usual consistency guarantee. It is one wmb cheaper, because we can + * collapse the two back-to-back wmb()s. + * + * seqcount_t seq; + * bool X = true, Y = false; + * + * void read(void) + * { + * bool x, y; + * + * do { + * int s = read_seqcount_begin(&seq); + * + * x = X; y = Y; + * + * } while (read_seqcount_retry(&seq, s)); + * + * BUG_ON(!x && !y); + * } + * + * void write(void) + * { + * Y = true; + * + * raw_write_seqcount_barrier(seq); + * + * X = false; + * } + */ +static inline void raw_write_seqcount_barrier(seqcount_t *s) +{ + s->sequence++; + smp_wmb(); + s->sequence++; +} + /* * raw_write_seqcount_latch - redirect readers to even/odd copy * @s: pointer to seqcount_t -- cgit v1.2.3 From 887d9dc989eb0154492e41e7c07492edbb088ba1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jun 2015 14:46:48 +0200 Subject: hrtimer: Allow hrtimer::function() to free the timer Currently an hrtimer callback function cannot free its own timer because __run_hrtimer() still needs to clear HRTIMER_STATE_CALLBACK after it. Freeing the timer would result in a clear use-after-free. Solve this by using a scheme similar to regular timers; track the current running timer in hrtimer_clock_base::running. Suggested-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Cc: ktkhai@parallels.com Cc: rostedt@goodmis.org Cc: juri.lelli@gmail.com Cc: pang.xunlei@linaro.org Cc: wanpeng.li@linux.intel.com Cc: Al Viro Cc: Linus Torvalds Cc: Paul McKenney Cc: Oleg Nesterov Cc: umgwanakikbuti@gmail.com Link: http://lkml.kernel.org/r/20150611124743.471563047@infradead.org Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 41 ++++++++++++++++------------------------- 1 file changed, 16 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 2f9e57d3d126..5db055821ef3 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -53,30 +53,25 @@ enum hrtimer_restart { * * 0x00 inactive * 0x01 enqueued into rbtree - * 0x02 callback function running - * 0x04 timer is migrated to another cpu * - * Special cases: - * 0x03 callback function running and enqueued - * (was requeued on another CPU) - * 0x05 timer was migrated on CPU hotunplug + * The callback state is not part of the timer->state because clearing it would + * mean touching the timer after the callback, this makes it impossible to free + * the timer from the callback function. * - * The "callback function running and enqueued" status is only possible on - * SMP. It happens for example when a posix timer expired and the callback + * Therefore we track the callback state in: + * + * timer->base->cpu_base->running == timer + * + * On SMP it is possible to have a "callback function running and enqueued" + * status. It happens for example when a posix timer expired and the callback * queued a signal. Between dropping the lock which protects the posix timer * and reacquiring the base lock of the hrtimer, another CPU can deliver the - * signal and rearm the timer. We have to preserve the callback running state, - * as otherwise the timer could be removed before the softirq code finishes the - * the handling of the timer. - * - * The HRTIMER_STATE_ENQUEUED bit is always or'ed to the current state - * to preserve the HRTIMER_STATE_CALLBACK in the above scenario. + * signal and rearm the timer. * * All state transitions are protected by cpu_base->lock. */ #define HRTIMER_STATE_INACTIVE 0x00 #define HRTIMER_STATE_ENQUEUED 0x01 -#define HRTIMER_STATE_CALLBACK 0x02 /** * struct hrtimer - the basic hrtimer structure @@ -163,6 +158,8 @@ enum hrtimer_base_type { * struct hrtimer_cpu_base - the per cpu clock bases * @lock: lock protecting the base and associated clock bases * and timers + * @seq: seqcount around __run_hrtimer + * @running: pointer to the currently running hrtimer * @cpu: cpu number * @active_bases: Bitfield to mark bases with active timers * @clock_was_set_seq: Sequence counter of clock was set events @@ -184,6 +181,8 @@ enum hrtimer_base_type { */ struct hrtimer_cpu_base { raw_spinlock_t lock; + seqcount_t seq; + struct hrtimer *running; unsigned int cpu; unsigned int active_bases; unsigned int clock_was_set_seq; @@ -391,15 +390,7 @@ extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer); extern u64 hrtimer_get_next_event(void); -/* - * A timer is active, when it is enqueued into the rbtree or the - * callback function is running or it's in the state of being migrated - * to another cpu. - */ -static inline int hrtimer_active(const struct hrtimer *timer) -{ - return timer->state != HRTIMER_STATE_INACTIVE; -} +extern bool hrtimer_active(const struct hrtimer *timer); /* * Helper function to check, whether the timer is on one of the queues @@ -415,7 +406,7 @@ static inline int hrtimer_is_queued(struct hrtimer *timer) */ static inline int hrtimer_callback_running(struct hrtimer *timer) { - return timer->state & HRTIMER_STATE_CALLBACK; + return timer->base->cpu_base->running == timer; } /* Forward a hrtimer so it expires after now: */ -- cgit v1.2.3 From a24fc60d63da2b0b31bf7c876d12a51ed4b778bd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jun 2015 14:46:53 +0200 Subject: lockdep: Implement lock pinning Add a lockdep annotation that WARNs if you 'accidentially' unlock a lock. This is especially helpful for code with callbacks, where the upper layer assumes a lock remains taken but a lower layer thinks it maybe can drop and reacquire the lock. By unwittingly breaking up the lock, races can be introduced. Lock pinning is a lockdep annotation that helps with this, when you lockdep_pin_lock() a held lock, any unlock without a lockdep_unpin_lock() will produce a WARN. Think of this as a relative of lockdep_assert_held(), except you don't only assert its held now, but ensure it stays held until you release your assertion. RFC: a possible alternative API would be something like: int cookie = lockdep_pin_lock(&foo); ... lockdep_unpin_lock(&foo, cookie); Where we pick a random number for the pin_count; this makes it impossible to sneak a lock break in without also passing the right cookie along. I've not done this because it ends up generating code for !LOCKDEP, esp. if you need to pass the cookie around for some reason. Signed-off-by: Peter Zijlstra (Intel) Cc: ktkhai@parallels.com Cc: rostedt@goodmis.org Cc: juri.lelli@gmail.com Cc: pang.xunlei@linaro.org Cc: oleg@redhat.com Cc: wanpeng.li@linux.intel.com Cc: umgwanakikbuti@gmail.com Link: http://lkml.kernel.org/r/20150611124743.906731065@infradead.org Signed-off-by: Thomas Gleixner --- include/linux/lockdep.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 066ba4157541..c5b6b5830acf 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -255,6 +255,7 @@ struct held_lock { unsigned int check:1; /* see lock_acquire() comment */ unsigned int hardirqs_off:1; unsigned int references:12; /* 32 bits */ + unsigned int pin_count; }; /* @@ -354,6 +355,9 @@ extern void lockdep_set_current_reclaim_state(gfp_t gfp_mask); extern void lockdep_clear_current_reclaim_state(void); extern void lockdep_trace_alloc(gfp_t mask); +extern void lock_pin_lock(struct lockdep_map *lock); +extern void lock_unpin_lock(struct lockdep_map *lock); + # define INIT_LOCKDEP .lockdep_recursion = 0, .lockdep_reclaim_gfp = 0, #define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0) @@ -368,6 +372,9 @@ extern void lockdep_trace_alloc(gfp_t mask); #define lockdep_recursing(tsk) ((tsk)->lockdep_recursion) +#define lockdep_pin_lock(l) lock_pin_lock(&(l)->dep_map) +#define lockdep_unpin_lock(l) lock_unpin_lock(&(l)->dep_map) + #else /* !CONFIG_LOCKDEP */ static inline void lockdep_off(void) @@ -420,6 +427,9 @@ struct lock_class_key { }; #define lockdep_recursing(tsk) (0) +#define lockdep_pin_lock(l) do { (void)(l); } while (0) +#define lockdep_unpin_lock(l) do { (void)(l); } while (0) + #endif /* !LOCKDEP */ #ifdef CONFIG_LOCK_STAT -- cgit v1.2.3 From 7f3b62cf945dc3e57fbd693022a5651206ce85b0 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 16 Jun 2015 16:27:43 +0200 Subject: acpi-video-detect: Remove the unused acpi_video_dmi_demote_vendor() function Remove the now unused acpi_video_dmi_demote_vendor() function, this was never a proper counter part of acpi_video_dmi_promote_vendor() since the calls to acpi_video_dmi_promote_vendor() are not counted. Signed-off-by: Hans de Goede Acked-by: Darren Hart Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index e4da5e35e29c..01bffd30c6c7 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -248,7 +248,6 @@ extern bool wmi_has_guid(const char *guid); extern long acpi_video_get_capabilities(acpi_handle graphics_dev_handle); extern long acpi_is_video_device(acpi_handle handle); extern void acpi_video_dmi_promote_vendor(void); -extern void acpi_video_dmi_demote_vendor(void); extern int acpi_video_backlight_support(void); extern int acpi_video_display_switch_support(void); @@ -268,10 +267,6 @@ static inline void acpi_video_dmi_promote_vendor(void) { } -static inline void acpi_video_dmi_demote_vendor(void) -{ -} - static inline int acpi_video_backlight_support(void) { return 0; -- cgit v1.2.3 From fb105d964226ce4834b45d7e3d9f339aa716ed70 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 16 Jun 2015 16:27:44 +0200 Subject: acpi-video-detect: Make acpi_video_get_capabilities a private function acpi_video_get_capabilities() is only used inside video_detect.c so make it static. While at it also remove the prototype for the non existent acpi_video_display_switch_support function from acpi.h Signed-off-by: Hans de Goede Acked-by: Darren Hart Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 01bffd30c6c7..88c92a03a77e 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -245,19 +245,12 @@ extern bool wmi_has_guid(const char *guid); #if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE) -extern long acpi_video_get_capabilities(acpi_handle graphics_dev_handle); extern long acpi_is_video_device(acpi_handle handle); extern void acpi_video_dmi_promote_vendor(void); extern int acpi_video_backlight_support(void); -extern int acpi_video_display_switch_support(void); #else -static inline long acpi_video_get_capabilities(acpi_handle graphics_dev_handle) -{ - return 0; -} - static inline long acpi_is_video_device(acpi_handle handle) { return 0; @@ -272,11 +265,6 @@ static inline int acpi_video_backlight_support(void) return 0; } -static inline int acpi_video_display_switch_support(void) -{ - return 0; -} - #endif /* defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE) */ extern int acpi_blacklisted(void); -- cgit v1.2.3 From adc8bb8e0fe005ed29366e6c4621652481878214 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 16 Jun 2015 16:27:45 +0200 Subject: acpi-video-detect: Move acpi_is_video_device() to acpi/scan.c This allows video_detect.c to be build as a module, this is a preparation patch for the backlight interface selection logic cleanup. Note this commit also causes acpi_is_video_device() to always be build indepedent of CONFIG_ACPI_VIDEO, as there is no reason to make its building depend on CONFIG_ACPI_VIDEO. Signed-off-by: Hans de Goede Acked-by: Darren Hart Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 88c92a03a77e..7cb3b0bc4a7e 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -243,19 +243,15 @@ extern bool wmi_has_guid(const char *guid); #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VENDOR 0x0400 #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VIDEO 0x0800 +extern long acpi_is_video_device(acpi_handle handle); + #if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE) -extern long acpi_is_video_device(acpi_handle handle); extern void acpi_video_dmi_promote_vendor(void); extern int acpi_video_backlight_support(void); #else -static inline long acpi_is_video_device(acpi_handle handle) -{ - return 0; -} - static inline void acpi_video_dmi_promote_vendor(void) { } -- cgit v1.2.3 From a87878bafa1f82c20eddaf2d23780b194c35ccf5 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 16 Jun 2015 16:27:46 +0200 Subject: acpi-video-detect: Move acpi_osi_is_win8 to osl.c acpi_osi_is_win8 needs access to acpi_gbl_osi_data which is not exported, so move it to osl.c. Alternatively we could export acpi_gbl_osi_data but that seems undesirable. This allows video_detect.c to be build as a module, besides that acpi_osi_is_win8() is something which does not really belong in video_detect.c in the first place. Signed-off-by: Hans de Goede Acked-by: Darren Hart Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 7cb3b0bc4a7e..913a1c19818a 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -266,6 +266,7 @@ static inline int acpi_video_backlight_support(void) extern int acpi_blacklisted(void); extern void acpi_dmi_osi_linux(int enable, const struct dmi_system_id *d); extern void acpi_osi_setup(char *str); +extern bool acpi_osi_is_win8(void); #ifdef CONFIG_ACPI_NUMA int acpi_get_node(acpi_handle handle); -- cgit v1.2.3 From 14ca7a47d0ab2a7a35faab130e6d9682f8ff1a46 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 16 Jun 2015 16:27:47 +0200 Subject: acpi-video-detect: video: Make video_detect code part of the video module This is a preparation patch for the backlight interface selection logic cleanup, there are 2 reasons to not always build the video_detect code into the kernel: 1) In order for the video_detect.c to also deal with / select native backlight interfaces on win8 systems, instead of doing this in video.c where it does not belong, video_detect.c needs to call into the backlight class code. Which cannot be done if it is builtin and the blacklight class is not. 2) Currently all the platform/x86 drivers which have quirks to prefer the vendor driver over acpi-video call acpi_video_unregister_backlight() to remove the acpi-video backlight interface, this logic really belongs in video_detect.c, which will cause video_detect.c to depend on symbols of video.c and video.c already depends on video_detect.c symbols, so they really need to be a single module. Note that this commits make 2 changes so as to maintain 100% kernel commandline compatibility: 1) The __setup call for the acpi_backlight= handling is moved to acpi/util.c as __setup may only be used by code which is alwasy builtin 2) video.c is renamed to acpi_video.c so that it can be combined with video_detect.c into video.ko This commit also makes changes to drivers/platform/x86/Kconfig to ensure that drivers which use acpi_video_backlight_support() from video_detect.c, will not be built-in when acpi_video is not built in. This also changes some "select" uses to "depends on" to avoid dependency loops. Signed-off-by: Hans de Goede Acked-by: Darren Hart Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 913a1c19818a..f097c0a2718e 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -243,6 +243,7 @@ extern bool wmi_has_guid(const char *guid); #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VENDOR 0x0400 #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VIDEO 0x0800 +extern char acpi_video_backlight_string[]; extern long acpi_is_video_device(acpi_handle handle); #if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE) -- cgit v1.2.3 From d0a530ba424ec1be7630f7fce2db9860b9429b8f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 16 Jun 2015 16:28:12 +0200 Subject: acpi-video-detect: Remove old API Remove the old backlight interface selection API now that all drivers have been ported to the new API. Signed-off-by: Hans de Goede Acked-by: Darren Hart Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index f097c0a2718e..5966d1d9280e 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -245,25 +245,6 @@ extern bool wmi_has_guid(const char *guid); extern char acpi_video_backlight_string[]; extern long acpi_is_video_device(acpi_handle handle); - -#if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE) - -extern void acpi_video_dmi_promote_vendor(void); -extern int acpi_video_backlight_support(void); - -#else - -static inline void acpi_video_dmi_promote_vendor(void) -{ -} - -static inline int acpi_video_backlight_support(void) -{ - return 0; -} - -#endif /* defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE) */ - extern int acpi_blacklisted(void); extern void acpi_dmi_osi_linux(int enable, const struct dmi_system_id *d); extern void acpi_osi_setup(char *str); -- cgit v1.2.3 From edf18b9108f5025f9e83b2c167c9122954acbc62 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 18 Jun 2015 14:00:48 +0800 Subject: crypto: api - Add CRYPTO_MINALIGN_ATTR to struct crypto_alg The struct crypto_alg is embedded into various type-specific structs such as aead_alg. This is then used as part of instances such as struct aead_instance. It is also embedded into the generic struct crypto_instance. In order to ensure that struct aead_instance can be converted to struct crypto_instance when necessary, we need to ensure that crypto_alg is aligned properly. This patch adds an alignment attribute to struct crypto_alg to ensure this. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 0e3f71a73e3b..964e5735a6a9 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -513,7 +513,7 @@ struct crypto_alg { void (*cra_destroy)(struct crypto_alg *alg); struct module *cra_module; -}; +} CRYPTO_MINALIGN_ATTR; /* * Algorithm registration interface. -- cgit v1.2.3 From 4bacc9c9234c7c8eec44f5ed4e960d9f96fa0f01 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 18 Jun 2015 14:32:31 +0100 Subject: overlayfs: Make f_path always point to the overlay and f_inode to the underlay Make file->f_path always point to the overlay dentry so that the path in /proc/pid/fd is correct and to ensure that label-based LSMs have access to the overlay as well as the underlay (path-based LSMs probably don't need it). Using my union testsuite to set things up, before the patch I see: [root@andromeda union-testsuite]# bash 5 /a/foo107 [root@andromeda union-testsuite]# stat /mnt/a/foo107 ... Device: 23h/35d Inode: 13381 Links: 1 ... [root@andromeda union-testsuite]# stat -L /proc/$$/fd/5 ... Device: 23h/35d Inode: 13381 Links: 1 ... After the patch: [root@andromeda union-testsuite]# bash 5 /mnt/a/foo107 [root@andromeda union-testsuite]# stat /mnt/a/foo107 ... Device: 23h/35d Inode: 40346 Links: 1 ... [root@andromeda union-testsuite]# stat -L /proc/$$/fd/5 ... Device: 23h/35d Inode: 40346 Links: 1 ... Note the change in where /proc/$$/fd/5 points to in the ls command. It was pointing to /a/foo107 (which doesn't exist) and now points to /mnt/a/foo107 (which is correct). The inode accessed, however, is the lower layer. The union layer is on device 25h/37d and the upper layer on 24h/36d. Signed-off-by: David Howells Signed-off-by: Al Viro --- include/linux/dcache.h | 2 ++ include/linux/fs.h | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index df334cbacc6d..167ec0934049 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -160,6 +160,7 @@ struct dentry_operations { char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(struct dentry *, bool); + struct inode *(*d_select_inode)(struct dentry *, unsigned); } ____cacheline_aligned; /* @@ -225,6 +226,7 @@ struct dentry_operations { #define DCACHE_MAY_FREE 0x00800000 #define DCACHE_FALLTHRU 0x01000000 /* Fall through to lower layer */ +#define DCACHE_OP_SELECT_INODE 0x02000000 /* Unioned entry: dcache op selects inode */ extern seqlock_t rename_lock; diff --git a/include/linux/fs.h b/include/linux/fs.h index b577e801b4af..2bd77e10e8e5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1641,7 +1641,6 @@ struct inode_operations { int (*set_acl)(struct inode *, struct posix_acl *, int); /* WARNING: probably going away soon, do not use! */ - int (*dentry_open)(struct dentry *, struct file *, const struct cred *); } ____cacheline_aligned; ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, @@ -2194,7 +2193,6 @@ extern struct file *file_open_name(struct filename *, int, umode_t); extern struct file *filp_open(const char *, int, umode_t); extern struct file *file_open_root(struct dentry *, struct vfsmount *, const char *, int); -extern int vfs_open(const struct path *, struct file *, const struct cred *); extern struct file * dentry_open(const struct path *, int, const struct cred *); extern int filp_close(struct file *, fl_owner_t id); -- cgit v1.2.3 From 68722101ec3a0e179408a13708dd020e04f54aab Mon Sep 17 00:00:00 2001 From: George Beshers Date: Thu, 18 Jun 2015 10:25:13 -0500 Subject: locking/lockdep: Remove hard coded array size dependency An apparent oversight left a hardcoded '4' in place when LOCKSTAT_POINTS was introduced. The contention_point[] and contending_point[] arrays in the structs lock_class and lock_class_stats need to be the same size for the loops in lock_stats() to be correct. This patch allows LOCKSTAT_POINTS to be changed without affecting the correctness of the code. Signed-off-by: George Beshers Cc: Andrew Morton Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 066ba4157541..2722111591a3 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -130,8 +130,8 @@ enum bounce_type { }; struct lock_class_stats { - unsigned long contention_point[4]; - unsigned long contending_point[4]; + unsigned long contention_point[LOCKSTAT_POINTS]; + unsigned long contending_point[LOCKSTAT_POINTS]; struct lock_time read_waittime; struct lock_time write_waittime; struct lock_time read_holdtime; -- cgit v1.2.3 From b17718d02f54b90978d0e0146368b512b11c3e84 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 5 Jun 2015 17:30:23 +0200 Subject: sched/stop_machine: Fix deadlock between multiple stop_two_cpus() Jiri reported a machine stuck in multi_cpu_stop() with migrate_swap_stop() as function and with the following src,dst cpu pairs: {11, 4} {13, 11} { 4, 13} 4 11 13 cpuM: queue(4 ,13) *Ma cpuN: queue(13,11) *N Na *M Mb cpuO: queue(11, 4) *O Oa *Nb *Ob Where *X denotes the cpu running the queueing of cpu-X and X[ab] denotes the first/second queued work. You'll observe the top of the workqueue for each cpu: 4,11,13 to be work from cpus: M, O, N resp. IOW. deadlock. Do away with the queueing trickery and introduce lg_double_lock() to lock both CPUs and fully serialize the stop_two_cpus() callers instead of the partial (and buggy) serialization we have now. Reported-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150605153023.GH19282@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/linux/lglock.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lglock.h b/include/linux/lglock.h index 0081f000e34b..c92ebd100d9b 100644 --- a/include/linux/lglock.h +++ b/include/linux/lglock.h @@ -52,10 +52,15 @@ struct lglock { static struct lglock name = { .lock = &name ## _lock } void lg_lock_init(struct lglock *lg, char *name); + void lg_local_lock(struct lglock *lg); void lg_local_unlock(struct lglock *lg); void lg_local_lock_cpu(struct lglock *lg, int cpu); void lg_local_unlock_cpu(struct lglock *lg, int cpu); + +void lg_double_lock(struct lglock *lg, int cpu1, int cpu2); +void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2); + void lg_global_lock(struct lglock *lg); void lg_global_unlock(struct lglock *lg); -- cgit v1.2.3 From 1dabbcec2c0a36fe43509d06499b9e512e70a028 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 26 May 2015 22:50:28 +0000 Subject: timer: Use hlist for the timer wheel hash buckets This reduces the size of struct tvec_base by 50% and results in slightly smaller code as well. Before: struct tvec_base: size: 8256, cachelines: 129 text data bss dec hex filename 17698 13297 8256 39251 9953 ../build/kernel/time/timer.o After: struct tvec_base: 4160, cachelines: 65 text data bss dec hex filename 17491 9201 4160 30852 7884 ../build/kernel/time/timer.o Signed-off-by: Thomas Gleixner Reviewed-by: Viresh Kumar Cc: Peter Zijlstra Cc: Paul McKenney Cc: Frederic Weisbecker Cc: Eric Dumazet Cc: John Stultz Cc: Joonwoo Park Cc: Wenbo Wang Link: http://lkml.kernel.org/r/20150526224511.854731214@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/timer.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index fbb80e0030bf..064ee24d3f38 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -14,7 +14,7 @@ struct timer_list { * All fields that change during normal runtime grouped to the * same cacheline */ - struct list_head entry; + struct hlist_node entry; unsigned long expires; struct tvec_base *base; @@ -71,7 +71,7 @@ extern struct tvec_base boot_tvec_bases; #define TIMER_FLAG_MASK 0x3LU #define __TIMER_INITIALIZER(_function, _expires, _data, _flags) { \ - .entry = { .prev = TIMER_ENTRY_STATIC }, \ + .entry = { .next = TIMER_ENTRY_STATIC }, \ .function = (_function), \ .expires = (_expires), \ .data = (_data), \ @@ -168,7 +168,7 @@ static inline void init_timer_on_stack_key(struct timer_list *timer, */ static inline int timer_pending(const struct timer_list * timer) { - return timer->entry.next != NULL; + return timer->entry.pprev != NULL; } extern void add_timer_on(struct timer_list *timer, int cpu); -- cgit v1.2.3 From 0eeda71bc30d74f66f8231f45621d5ace3419186 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 26 May 2015 22:50:29 +0000 Subject: timer: Replace timer base by a cpu index Instead of storing a pointer to the per cpu tvec_base we can simply cache a CPU index in the timer_list and use that to get hold of the correct per cpu tvec_base. This is only used in lock_timer_base() and the slightly larger code is peanuts versus the spinlock operation and the d-cache foot print of the timer wheel. Aside of that this allows to get rid of following nuisances: - boot_tvec_base That statically allocated 4k bss data is just kept around so the timer has a home when it gets statically initialized. It serves no other purpose. With the CPU index we assign the timer to CPU0 at static initialization time and therefor can avoid the whole boot_tvec_base dance. That also simplifies the init code, which just can use the per cpu base. Before: text data bss dec hex filename 17491 9201 4160 30852 7884 ../build/kernel/time/timer.o After: text data bss dec hex filename 17440 9193 0 26633 6809 ../build/kernel/time/timer.o - Overloading the base pointer with various flags The CPU index has enough space to hold the flags (deferrable, irqsafe) so we can get rid of the extra masking and bit fiddling with the base pointer. As a benefit we reduce the size of struct timer_list on 64 bit machines. 4 - 8 bytes, a size reduction up to 15% per struct timer_list, which is a real win as we have tons of them embedded in other structs. This changes also the newly added deferrable printout of the timer start trace point to capture and print all timer->flags, which allows us to decode the target cpu of the timer as well. We might have used bitfields for this, but that would change the static initializers and the init function for no value to accomodate big endian bitfields. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Paul McKenney Cc: Frederic Weisbecker Cc: Eric Dumazet Cc: Viresh Kumar Cc: John Stultz Cc: Joonwoo Park Cc: Wenbo Wang Cc: Steven Rostedt Cc: Badhri Jagan Sridharan Link: http://lkml.kernel.org/r/20150526224511.950084301@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/timer.h | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index 064ee24d3f38..4a0d52bc2073 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -14,27 +14,23 @@ struct timer_list { * All fields that change during normal runtime grouped to the * same cacheline */ - struct hlist_node entry; - unsigned long expires; - struct tvec_base *base; - - void (*function)(unsigned long); - unsigned long data; - - int slack; + struct hlist_node entry; + unsigned long expires; + void (*function)(unsigned long); + unsigned long data; + u32 flags; + int slack; #ifdef CONFIG_TIMER_STATS - int start_pid; - void *start_site; - char start_comm[16]; + int start_pid; + void *start_site; + char start_comm[16]; #endif #ifdef CONFIG_LOCKDEP - struct lockdep_map lockdep_map; + struct lockdep_map lockdep_map; #endif }; -extern struct tvec_base boot_tvec_bases; - #ifdef CONFIG_LOCKDEP /* * NB: because we have to copy the lockdep_map, setting the lockdep_map key @@ -49,9 +45,6 @@ extern struct tvec_base boot_tvec_bases; #endif /* - * Note that all tvec_bases are at least 4 byte aligned and lower two bits - * of base in timer_list is guaranteed to be zero. Use them for flags. - * * A deferrable timer will work normally when the system is busy, but * will not cause a CPU to come out of idle just to service it; instead, * the timer will be serviced when the CPU eventually wakes up with a @@ -65,17 +58,18 @@ extern struct tvec_base boot_tvec_bases; * workqueue locking issues. It's not meant for executing random crap * with interrupts disabled. Abuse is monitored! */ -#define TIMER_DEFERRABLE 0x1LU -#define TIMER_IRQSAFE 0x2LU - -#define TIMER_FLAG_MASK 0x3LU +#define TIMER_CPUMASK 0x0007FFFF +#define TIMER_MIGRATING 0x00080000 +#define TIMER_BASEMASK (TIMER_CPUMASK | TIMER_MIGRATING) +#define TIMER_DEFERRABLE 0x00100000 +#define TIMER_IRQSAFE 0x00200000 #define __TIMER_INITIALIZER(_function, _expires, _data, _flags) { \ .entry = { .next = TIMER_ENTRY_STATIC }, \ .function = (_function), \ .expires = (_expires), \ .data = (_data), \ - .base = (void *)((unsigned long)&boot_tvec_bases + (_flags)), \ + .flags = (_flags), \ .slack = -1, \ __TIMER_LOCKDEP_MAP_INITIALIZER( \ __FILE__ ":" __stringify(__LINE__)) \ -- cgit v1.2.3 From c74441a17eb975b604e339ca6c11b9ab9aaca11f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 26 May 2015 22:50:31 +0000 Subject: timer: Stats: Simplify the flags handling Simplify the handling of the flag storage for the timer statistics. No intermediate storage anymore. Just hand over the flags field. I left the printout of 'deferrable' for now because changing this would be an ABI update and I have no idea how strong people feel about that. OTOH, I wonder whether we should kill the whole timer stats stuff because all of that information can be retrieved via ftrace/perf as well. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Paul McKenney Cc: Frederic Weisbecker Cc: Eric Dumazet Cc: Viresh Kumar Cc: John Stultz Cc: Joonwoo Park Cc: Wenbo Wang Link: http://lkml.kernel.org/r/20150526224512.046626248@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/timer.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index 4a0d52bc2073..ff0689b6e297 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -188,13 +188,10 @@ extern void set_timer_slack(struct timer_list *time, int slack_hz); extern int timer_stats_active; -#define TIMER_STATS_FLAG_DEFERRABLE 0x1 - extern void init_timer_stats(void); extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf, - void *timerf, char *comm, - unsigned int timer_flag); + void *timerf, char *comm, u32 flags); extern void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr); -- cgit v1.2.3 From bc7a34b8b9ebfb0f4b8a35a72a0b134fd6c5ef50 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 26 May 2015 22:50:33 +0000 Subject: timer: Reduce timer migration overhead if disabled Eric reported that the timer_migration sysctl is not really nice performance wise as it needs to check at every timer insertion whether the feature is enabled or not. Further the check does not live in the timer code, so we have an extra function call which checks an extra cache line to figure out that it is disabled. We can do better and store that information in the per cpu (hr)timer bases. I pondered to use a static key, but that's a nightmare to update from the nohz code and the timer base cache line is hot anyway when we select a timer base. The old logic enabled the timer migration unconditionally if CONFIG_NO_HZ was set even if nohz was disabled on the kernel command line. With this modification, we start off with migration disabled. The user visible sysctl is still set to enabled. If the kernel switches to NOHZ migration is enabled, if the user did not disable it via the sysctl prior to the switch. If nohz=off is on the kernel command line, migration stays disabled no matter what. Before: 47.76% hog [.] main 14.84% [kernel] [k] _raw_spin_lock_irqsave 9.55% [kernel] [k] _raw_spin_unlock_irqrestore 6.71% [kernel] [k] mod_timer 6.24% [kernel] [k] lock_timer_base.isra.38 3.76% [kernel] [k] detach_if_pending 3.71% [kernel] [k] del_timer 2.50% [kernel] [k] internal_add_timer 1.51% [kernel] [k] get_nohz_timer_target 1.28% [kernel] [k] __internal_add_timer 0.78% [kernel] [k] timerfn 0.48% [kernel] [k] wake_up_nohz_cpu After: 48.10% hog [.] main 15.25% [kernel] [k] _raw_spin_lock_irqsave 9.76% [kernel] [k] _raw_spin_unlock_irqrestore 6.50% [kernel] [k] mod_timer 6.44% [kernel] [k] lock_timer_base.isra.38 3.87% [kernel] [k] detach_if_pending 3.80% [kernel] [k] del_timer 2.67% [kernel] [k] internal_add_timer 1.33% [kernel] [k] __internal_add_timer 0.73% [kernel] [k] timerfn 0.54% [kernel] [k] wake_up_nohz_cpu Reported-by: Eric Dumazet Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Paul McKenney Cc: Frederic Weisbecker Cc: Viresh Kumar Cc: John Stultz Cc: Joonwoo Park Cc: Wenbo Wang Link: http://lkml.kernel.org/r/20150526224512.127050787@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 2 ++ include/linux/sched.h | 6 +----- include/linux/sched/sysctl.h | 12 ------------ include/linux/timer.h | 9 +++++++++ 4 files changed, 12 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 5db055821ef3..69551020bb97 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -163,6 +163,7 @@ enum hrtimer_base_type { * @cpu: cpu number * @active_bases: Bitfield to mark bases with active timers * @clock_was_set_seq: Sequence counter of clock was set events + * @migration_enabled: The migration of hrtimers to other cpus is enabled * @expires_next: absolute time of the next event which was scheduled * via clock_set_next_event() * @next_timer: Pointer to the first expiring timer @@ -186,6 +187,7 @@ struct hrtimer_cpu_base { unsigned int cpu; unsigned int active_bases; unsigned int clock_was_set_seq; + bool migration_enabled; #ifdef CONFIG_HIGH_RES_TIMERS unsigned int in_hrtirq : 1, hres_active : 1, diff --git a/include/linux/sched.h b/include/linux/sched.h index 26a2e6122734..d7151460b0cf 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -335,14 +335,10 @@ extern int runqueue_is_locked(int cpu); #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) extern void nohz_balance_enter_idle(int cpu); extern void set_cpu_sd_state_idle(void); -extern int get_nohz_timer_target(int pinned); +extern int get_nohz_timer_target(void); #else static inline void nohz_balance_enter_idle(int cpu) { } static inline void set_cpu_sd_state_idle(void) { } -static inline int get_nohz_timer_target(int pinned) -{ - return smp_processor_id(); -} #endif /* diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 596a0e007c62..c9e4731cf10b 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -57,24 +57,12 @@ extern unsigned int sysctl_numa_balancing_scan_size; extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_nr_migrate; extern unsigned int sysctl_sched_time_avg; -extern unsigned int sysctl_timer_migration; extern unsigned int sysctl_sched_shares_window; int sched_proc_update_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos); #endif -#ifdef CONFIG_SCHED_DEBUG -static inline unsigned int get_sysctl_timer_migration(void) -{ - return sysctl_timer_migration; -} -#else -static inline unsigned int get_sysctl_timer_migration(void) -{ - return 1; -} -#endif /* * control realtime throttling: diff --git a/include/linux/timer.h b/include/linux/timer.h index ff0689b6e297..61aa61dc410c 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -238,6 +238,15 @@ extern void run_local_timers(void); struct hrtimer; extern enum hrtimer_restart it_real_fn(struct hrtimer *); +#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) +#include + +extern unsigned int sysctl_timer_migration; +int timer_migration_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); +#endif + unsigned long __round_jiffies(unsigned long j, int cpu); unsigned long __round_jiffies_relative(unsigned long j, int cpu); unsigned long round_jiffies(unsigned long j); -- cgit v1.2.3 From 683be13a284720205228e29207ef11a1c3c322b9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 26 May 2015 22:50:35 +0000 Subject: timer: Minimize nohz off overhead If nohz is disabled on the kernel command line the [hr]timer code still calls wake_up_nohz_cpu() and tick_nohz_full_cpu(), a pretty pointless exercise. Cache nohz_active in [hr]timer per cpu bases and avoid the overhead. Before: 48.10% hog [.] main 15.25% [kernel] [k] _raw_spin_lock_irqsave 9.76% [kernel] [k] _raw_spin_unlock_irqrestore 6.50% [kernel] [k] mod_timer 6.44% [kernel] [k] lock_timer_base.isra.38 3.87% [kernel] [k] detach_if_pending 3.80% [kernel] [k] del_timer 2.67% [kernel] [k] internal_add_timer 1.33% [kernel] [k] __internal_add_timer 0.73% [kernel] [k] timerfn 0.54% [kernel] [k] wake_up_nohz_cpu After: 48.73% hog [.] main 15.36% [kernel] [k] _raw_spin_lock_irqsave 9.77% [kernel] [k] _raw_spin_unlock_irqrestore 6.61% [kernel] [k] lock_timer_base.isra.38 6.42% [kernel] [k] mod_timer 3.90% [kernel] [k] detach_if_pending 3.76% [kernel] [k] del_timer 2.41% [kernel] [k] internal_add_timer 1.39% [kernel] [k] __internal_add_timer 0.76% [kernel] [k] timerfn We probably should have a cached value for nohz full in the per cpu bases as well to avoid the cpumask check. The base cache line is hot already, the cpumask not necessarily. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Paul McKenney Cc: Frederic Weisbecker Cc: Eric Dumazet Cc: Viresh Kumar Cc: John Stultz Cc: Joonwoo Park Cc: Wenbo Wang Link: http://lkml.kernel.org/r/20150526224512.207378134@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 69551020bb97..76dd4f0da5ca 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -164,6 +164,7 @@ enum hrtimer_base_type { * @active_bases: Bitfield to mark bases with active timers * @clock_was_set_seq: Sequence counter of clock was set events * @migration_enabled: The migration of hrtimers to other cpus is enabled + * @nohz_active: The nohz functionality is enabled * @expires_next: absolute time of the next event which was scheduled * via clock_set_next_event() * @next_timer: Pointer to the first expiring timer @@ -188,6 +189,7 @@ struct hrtimer_cpu_base { unsigned int active_bases; unsigned int clock_was_set_seq; bool migration_enabled; + bool nohz_active; #ifdef CONFIG_HIGH_RES_TIMERS unsigned int in_hrtirq : 1, hres_active : 1, -- cgit v1.2.3 From 1796dcce2daacc125f2d60afc3f631ca29e36684 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 3 May 2015 18:57:10 +0900 Subject: rtc: interface: Fix coding style violations Fix issues reported by checkpatch: ERROR: open brace '{' following struct go on the same line ERROR: "foo* bar" should be "foo *bar" Additionally adjust alignment of wrapped function arguments. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Alexandre Belloni --- include/linux/rtc.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 8dcf6825fa88..b0709f80dfb0 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -101,8 +101,7 @@ struct rtc_timer { /* flags */ #define RTC_DEV_BUSY 0 -struct rtc_device -{ +struct rtc_device { struct device dev; struct module *owner; @@ -198,10 +197,10 @@ int rtc_register(rtc_task_t *task); int rtc_unregister(rtc_task_t *task); int rtc_control(rtc_task_t *t, unsigned int cmd, unsigned long arg); -void rtc_timer_init(struct rtc_timer *timer, void (*f)(void* p), void* data); -int rtc_timer_start(struct rtc_device *rtc, struct rtc_timer* timer, - ktime_t expires, ktime_t period); -int rtc_timer_cancel(struct rtc_device *rtc, struct rtc_timer* timer); +void rtc_timer_init(struct rtc_timer *timer, void (*f)(void *p), void *data); +int rtc_timer_start(struct rtc_device *rtc, struct rtc_timer *timer, + ktime_t expires, ktime_t period); +int rtc_timer_cancel(struct rtc_device *rtc, struct rtc_timer *timer); void rtc_timer_do_work(struct work_struct *work); static inline bool is_leap_year(unsigned int year) -- cgit v1.2.3 From 73744a64aab872703b851b9678a7f488b507eb81 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 3 May 2015 18:57:11 +0900 Subject: rtc: interface: Remove unused return value from rtc_timer_cancel() The rtc_timer_cancel() always returns 0 and cannot fail (calls only other void-returning functions). Signed-off-by: Krzysztof Kozlowski Signed-off-by: Alexandre Belloni --- include/linux/rtc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index b0709f80dfb0..587017e7939c 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -200,7 +200,7 @@ int rtc_control(rtc_task_t *t, unsigned int cmd, unsigned long arg); void rtc_timer_init(struct rtc_timer *timer, void (*f)(void *p), void *data); int rtc_timer_start(struct rtc_device *rtc, struct rtc_timer *timer, ktime_t expires, ktime_t period); -int rtc_timer_cancel(struct rtc_device *rtc, struct rtc_timer *timer); +void rtc_timer_cancel(struct rtc_device *rtc, struct rtc_timer *timer); void rtc_timer_do_work(struct work_struct *work); static inline bool is_leap_year(unsigned int year) -- cgit v1.2.3 From d8d919879e9a645061a560a0a26abb9f3bca97df Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 3 Apr 2015 18:43:44 +0200 Subject: clk: add CLK_RECALC_NEW_RATES clock flag for Exynos cpu clock support This flag is needed to fix the issue with wrong dividers being setup by Common Clock Framework when using the new Exynos cpu clock support. The issue happens because clk_core_set_rate_nolock() calls clk_calc_new_rates(clk, rate) before both pre/post clock notifiers have a chance to run. In case of Exynos cpu clock support pre/post clock notifiers are registered for mout_apll clock which is a parent of armclk cpu clock and dividers are modified in both pre and post clock notifier. This results in wrong dividers values being later programmed by clk_change_rate(top). To workaround the problem CLK_RECALC_NEW_RATES flag is added and it is set for mout_apll clock later so the correct divider values are re-calculated after both pre and post clock notifiers had run. For example when using "performance" governor on Exynos4210 Origen board the cpufreq-dt driver requests to change the frequency from 1000MHz to 1200MHz and after the change state of the relevant clocks is following: Without use of CLK_GET_RATE_NOCACHE flag: fout_apll rate: 1200000000 fout_apll_div_2 rate: 600000000 mout_clkout_cpu rate: 600000000 div_clkout_cpu rate: 600000000 clkout_cpu rate: 600000000 mout_apll rate: 1200000000 armclk rate: 1200000000 mout_hpm rate: 1200000000 div_copy rate: 300000000 div_hpm rate: 300000000 mout_core rate: 1200000000 div_core rate: 1200000000 div_core2 rate: 1200000000 arm_clk_div_2 rate: 600000000 div_corem0 rate: 300000000 div_corem1 rate: 150000000 div_periph rate: 300000000 div_atb rate: 300000000 div_pclk_dbg rate: 150000000 sclk_apll rate: 1200000000 sclk_apll_div_2 rate: 600000000 With use of CLK_GET_RATE_NOCACHE flag: fout_apll rate: 1200000000 fout_apll_div_2 rate: 600000000 mout_clkout_cpu rate: 600000000 div_clkout_cpu rate: 600000000 clkout_cpu rate: 600000000 mout_apll rate: 1200000000 armclk rate: 1200000000 mout_hpm rate: 1200000000 div_copy rate: 200000000 div_hpm rate: 200000000 mout_core rate: 1200000000 div_core rate: 1200000000 div_core2 rate: 1200000000 arm_clk_div_2 rate: 600000000 div_corem0 rate: 300000000 div_corem1 rate: 150000000 div_periph rate: 300000000 div_atb rate: 240000000 div_pclk_dbg rate: 120000000 sclk_apll rate: 150000000 sclk_apll_div_2 rate: 75000000 Without this change cpufreq-dt driver showed ~10 mA larger energy consumption when compared to cpufreq-exynos one when "performance" cpufreq governor was used on Exynos4210 SoC based Origen board. This issue was probably meant to be workarounded by use of CLK_GET_RATE_NOCACHE and CLK_DIVIDER_READ_ONLY clock flags in the original Exynos cpu clock patchset (in "[PATCH v12 6/6] clk: samsung: remove unused clock aliases and update clock flags" patch) but usage of these flags is not sufficient to fix the issue observed. Cc: Thomas Abraham Cc: Tomasz Figa Cc: Mike Turquette Cc: Javier Martinez Canillas Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Michael Turquette --- include/linux/clk-provider.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index df695313f975..82f59ca8188a 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -31,6 +31,7 @@ #define CLK_GET_RATE_NOCACHE BIT(6) /* do not use the cached clk rate */ #define CLK_SET_RATE_NO_REPARENT BIT(7) /* don't re-parent on rate change */ #define CLK_GET_ACCURACY_NOCACHE BIT(8) /* do not use the cached clk accuracy */ +#define CLK_RECALC_NEW_RATES BIT(9) /* recalc rates after notifications */ struct clk_hw; struct clk_core; -- cgit v1.2.3 From 1387fe7d292b66677dae31d25a8e3c953bf21748 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Tue, 12 May 2015 11:31:02 +0200 Subject: MIPS: BCM47xx: Extract all boardflags to new u32 fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For years we planned to get rid of old u16 fields, let's start doing it with MIPS code. This process will take some time, it requires doing the same in ssb/bcma and then switching all drivers to new fields. This will be handled in separated patches submitted to appropriate trees. Signed-off-by: Rafał Miłecki Cc: linux-mips@linux-mips.org Cc: Hauke Mehrtens Patchwork: https://patchwork.linux-mips.org/patch/10026/ Signed-off-by: Ralf Baechle --- include/linux/ssb/ssb.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index 4568a5cc9ab8..ee90e32a0607 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -88,11 +88,14 @@ struct ssb_sprom { u32 ofdm5glpo; /* 5.2GHz OFDM power offset */ u32 ofdm5gpo; /* 5.3GHz OFDM power offset */ u32 ofdm5ghpo; /* 5.8GHz OFDM power offset */ + u32 boardflags; + u32 boardflags2; + u32 boardflags3; + /* TODO: Switch all drivers to new u32 fields and drop below ones */ u16 boardflags_lo; /* Board flags (bits 0-15) */ u16 boardflags_hi; /* Board flags (bits 16-31) */ u16 boardflags2_lo; /* Board flags (bits 32-47) */ u16 boardflags2_hi; /* Board flags (bits 48-63) */ - /* TODO store board flags in a single u64 */ struct ssb_sprom_core_pwr_info core_pwr_info[4]; -- cgit v1.2.3 From 6e122ac0053d071976686dd04cdd60ea8039bb7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Tue, 12 May 2015 11:54:48 +0200 Subject: MIPS: BCM47xx: Extract info about et2 interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New devices may have more than 1 Ethernet core (device). We should extract info about them to make it available to Ethernet drivers. Signed-off-by: Rafał Miłecki Cc: linux-mips@linux-mips.org Cc: Hauke Mehrtens Cc: Hante Meuleman Cc: Ian Kent Patchwork: https://patchwork.linux-mips.org/patch/10027/ Signed-off-by: Ralf Baechle --- include/linux/ssb/ssb.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index ee90e32a0607..c3d1a525bacc 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -29,10 +29,13 @@ struct ssb_sprom { u8 il0mac[6] __aligned(sizeof(u16)); /* MAC address for 802.11b/g */ u8 et0mac[6] __aligned(sizeof(u16)); /* MAC address for Ethernet */ u8 et1mac[6] __aligned(sizeof(u16)); /* MAC address for 802.11a */ + u8 et2mac[6] __aligned(sizeof(u16)); /* MAC address for extra Ethernet */ u8 et0phyaddr; /* MII address for enet0 */ u8 et1phyaddr; /* MII address for enet1 */ + u8 et2phyaddr; /* MII address for enet2 */ u8 et0mdcport; /* MDIO for enet0 */ u8 et1mdcport; /* MDIO for enet1 */ + u8 et2mdcport; /* MDIO for enet2 */ u16 dev_id; /* Device ID overriding e.g. PCI ID */ u16 board_rev; /* Board revision number from SPROM. */ u16 board_num; /* Board number from SPROM. */ -- cgit v1.2.3 From 44e08e7099c8de226606cfc989b45d6fa27f507f Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sun, 24 May 2015 16:11:31 +0100 Subject: MIPS/IRQCHIP: Move Ingenic SoC intc driver to drivers/irqchip Move the driver for Ingenic SoC interrupt controllers into drivers/irqchip where it belongs. Signed-off-by: Paul Burton Cc: Lars-Peter Clausen Cc: Thomas Gleixner Cc: Jason Cooper Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: Brian Norris Patchwork: https://patchwork.linux-mips.org/patch/10147/ Signed-off-by: Ralf Baechle --- include/linux/irqchip/ingenic.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 include/linux/irqchip/ingenic.h (limited to 'include/linux') diff --git a/include/linux/irqchip/ingenic.h b/include/linux/irqchip/ingenic.h new file mode 100644 index 000000000000..0ee319a4029d --- /dev/null +++ b/include/linux/irqchip/ingenic.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2010, Lars-Peter Clausen + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef __LINUX_IRQCHIP_INGENIC_H__ +#define __LINUX_IRQCHIP_INGENIC_H__ + +#include + +extern void ingenic_intc_irq_suspend(struct irq_data *data); +extern void ingenic_intc_irq_resume(struct irq_data *data); + +#endif -- cgit v1.2.3 From 55cab93bcf1422ab4298edc65c349c4304b4884e Mon Sep 17 00:00:00 2001 From: Hante Meuleman Date: Thu, 21 May 2015 15:27:23 +0200 Subject: mips: bcm47xx: allow retrieval of complete nvram contents Host platforms such as routers supported by OpenWrt can support NVRAM reading directly from internal NVRAM store. The brcmfmac for one requires the complete nvram contents to select what needs to be sent to wireless device. Signed-off-by: Arend van Spriel Signed-off-by: Hante Meuleman Reviewed-by: Arend Van Spriel Reviewed-by: Franky (Zhenhui) Lin Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Daniel (Deognyoun) Kim Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10093/ Signed-off-by: Ralf Baechle --- include/linux/bcm47xx_nvram.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bcm47xx_nvram.h b/include/linux/bcm47xx_nvram.h index b12b07e75929..c73927c66c2c 100644 --- a/include/linux/bcm47xx_nvram.h +++ b/include/linux/bcm47xx_nvram.h @@ -10,11 +10,17 @@ #include #include +#include #ifdef CONFIG_BCM47XX int bcm47xx_nvram_init_from_mem(u32 base, u32 lim); int bcm47xx_nvram_getenv(const char *name, char *val, size_t val_len); int bcm47xx_nvram_gpio_pin(const char *name); +char *bcm47xx_nvram_get_contents(size_t *val_len); +static inline void bcm47xx_nvram_release_contents(char *nvram) +{ + vfree(nvram); +}; #else static inline int bcm47xx_nvram_init_from_mem(u32 base, u32 lim) { @@ -29,6 +35,15 @@ static inline int bcm47xx_nvram_gpio_pin(const char *name) { return -ENOTSUPP; }; + +static inline char *bcm47xx_nvram_get_contents(size_t *val_len) +{ + return NULL; +}; + +static inline void bcm47xx_nvram_release_contents(char *nvram) +{ +}; #endif #endif /* __BCM47XX_NVRAM_H */ -- cgit v1.2.3 From 2ddf3a792218cddd30140b1f8b32cb6e2d67921f Mon Sep 17 00:00:00 2001 From: Alban Bedel Date: Sun, 31 May 2015 02:18:24 +0200 Subject: MIPS: ath79: Add OF support to the GPIO driver Replace the simple GPIO chip registration by a platform driver and make ath79_gpio_init() just register the device. Signed-off-by: Alban Bedel Cc: linux-mips@linux-mips.org Signed-off-by: Ralf Baechle --- include/linux/platform_data/gpio-ath79.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 include/linux/platform_data/gpio-ath79.h (limited to 'include/linux') diff --git a/include/linux/platform_data/gpio-ath79.h b/include/linux/platform_data/gpio-ath79.h new file mode 100644 index 000000000000..88b0db7bee74 --- /dev/null +++ b/include/linux/platform_data/gpio-ath79.h @@ -0,0 +1,19 @@ +/* + * Atheros AR7XXX/AR9XXX GPIO controller platform data + * + * Copyright (C) 2015 Alban Bedel + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LINUX_PLATFORM_DATA_GPIO_ATH79_H +#define __LINUX_PLATFORM_DATA_GPIO_ATH79_H + +struct ath79_gpio_platform_data { + unsigned ngpios; + bool oe_inverted; +}; + +#endif -- cgit v1.2.3 From f6e734a8c162297953d7bfc0f3f6bf4f8c33d72f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Wed, 10 Jun 2015 23:05:08 +0200 Subject: MIPS: BCM47xx: Move NVRAM driver to the drivers/firmware/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After Broadcom switched from MIPS to ARM for their home routers we need to have NVRAM driver in some common place (not arch/mips/). As explained in Kconfig, this driver is responsible for parsing SoC configuration data that is passed to the kernel in flash from the bootloader firmware called "CFE". We were thinking about putting it in bus directory, however there are two possible buses for MIPS: drivers/ssb/ and drivers/bcma/. So this won't fit there and this is why I would like to move this driver to the drivers/firmware/. Signed-off-by: Rafał Miłecki Reviewed-by: Paul Walmsley Cc: linux-mips@linux-mips.org Cc: Hauke Mehrtens Cc: Seiji Aguchi Cc: Greg Kroah-Hartman Cc: Ard Biesheuvel Cc: Mike Waychison Cc: Roy Franz Cc: Matt Fleming Cc: Linus Torvalds Patchwork: https://patchwork.linux-mips.org/patch/10544/ Signed-off-by: Ralf Baechle --- include/linux/bcm47xx_nvram.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bcm47xx_nvram.h b/include/linux/bcm47xx_nvram.h index c73927c66c2c..2793652fbf66 100644 --- a/include/linux/bcm47xx_nvram.h +++ b/include/linux/bcm47xx_nvram.h @@ -12,7 +12,7 @@ #include #include -#ifdef CONFIG_BCM47XX +#ifdef CONFIG_BCM47XX_NVRAM int bcm47xx_nvram_init_from_mem(u32 base, u32 lim); int bcm47xx_nvram_getenv(const char *name, char *val, size_t val_len); int bcm47xx_nvram_gpio_pin(const char *name); -- cgit v1.2.3 From d0497524658e37956737d7dbee73cc42120255dc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 21 Jun 2015 19:11:44 +0800 Subject: crypto: user - Move cryptouser.h to uapi The header file cryptouser.h only contains information that is exported to user-space. Signed-off-by: Herbert Xu --- include/linux/cryptouser.h | 110 --------------------------------------------- 1 file changed, 110 deletions(-) delete mode 100644 include/linux/cryptouser.h (limited to 'include/linux') diff --git a/include/linux/cryptouser.h b/include/linux/cryptouser.h deleted file mode 100644 index 36efbbbf2f83..000000000000 --- a/include/linux/cryptouser.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Crypto user configuration API. - * - * Copyright (C) 2011 secunet Security Networks AG - * Copyright (C) 2011 Steffen Klassert - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - */ - -/* Netlink configuration messages. */ -enum { - CRYPTO_MSG_BASE = 0x10, - CRYPTO_MSG_NEWALG = 0x10, - CRYPTO_MSG_DELALG, - CRYPTO_MSG_UPDATEALG, - CRYPTO_MSG_GETALG, - __CRYPTO_MSG_MAX -}; -#define CRYPTO_MSG_MAX (__CRYPTO_MSG_MAX - 1) -#define CRYPTO_NR_MSGTYPES (CRYPTO_MSG_MAX + 1 - CRYPTO_MSG_BASE) - -#define CRYPTO_MAX_NAME CRYPTO_MAX_ALG_NAME - -/* Netlink message attributes. */ -enum crypto_attr_type_t { - CRYPTOCFGA_UNSPEC, - CRYPTOCFGA_PRIORITY_VAL, /* __u32 */ - CRYPTOCFGA_REPORT_LARVAL, /* struct crypto_report_larval */ - CRYPTOCFGA_REPORT_HASH, /* struct crypto_report_hash */ - CRYPTOCFGA_REPORT_BLKCIPHER, /* struct crypto_report_blkcipher */ - CRYPTOCFGA_REPORT_AEAD, /* struct crypto_report_aead */ - CRYPTOCFGA_REPORT_COMPRESS, /* struct crypto_report_comp */ - CRYPTOCFGA_REPORT_RNG, /* struct crypto_report_rng */ - CRYPTOCFGA_REPORT_CIPHER, /* struct crypto_report_cipher */ - CRYPTOCFGA_REPORT_AKCIPHER, /* struct crypto_report_akcipher */ - __CRYPTOCFGA_MAX - -#define CRYPTOCFGA_MAX (__CRYPTOCFGA_MAX - 1) -}; - -struct crypto_user_alg { - char cru_name[CRYPTO_MAX_ALG_NAME]; - char cru_driver_name[CRYPTO_MAX_ALG_NAME]; - char cru_module_name[CRYPTO_MAX_ALG_NAME]; - __u32 cru_type; - __u32 cru_mask; - __u32 cru_refcnt; - __u32 cru_flags; -}; - -struct crypto_report_larval { - char type[CRYPTO_MAX_NAME]; -}; - -struct crypto_report_hash { - char type[CRYPTO_MAX_NAME]; - unsigned int blocksize; - unsigned int digestsize; -}; - -struct crypto_report_cipher { - char type[CRYPTO_MAX_ALG_NAME]; - unsigned int blocksize; - unsigned int min_keysize; - unsigned int max_keysize; -}; - -struct crypto_report_blkcipher { - char type[CRYPTO_MAX_NAME]; - char geniv[CRYPTO_MAX_NAME]; - unsigned int blocksize; - unsigned int min_keysize; - unsigned int max_keysize; - unsigned int ivsize; -}; - -struct crypto_report_aead { - char type[CRYPTO_MAX_NAME]; - char geniv[CRYPTO_MAX_NAME]; - unsigned int blocksize; - unsigned int maxauthsize; - unsigned int ivsize; -}; - -struct crypto_report_comp { - char type[CRYPTO_MAX_NAME]; -}; - -struct crypto_report_rng { - char type[CRYPTO_MAX_NAME]; - unsigned int seedsize; -}; - -struct crypto_report_akcipher { - char type[CRYPTO_MAX_NAME]; -}; - -#define CRYPTO_REPORT_MAXSIZE (sizeof(struct crypto_user_alg) + \ - sizeof(struct crypto_report_blkcipher)) -- cgit v1.2.3 From 3e90950d36f19e5477becd5acb02e9b8d8c8956f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 22 Jun 2015 10:31:40 +0800 Subject: crypto: algif_aead - Temporarily disable all AEAD algorithms As the AEAD conversion is still ongoing, we do not yet wish to export legacy AEAD implementations to user-space, as their calling convention will change. This patch actually disables all AEAD algorithms because some of them (e.g., cryptd) will need to be modified to propagate this flag. Subsequent patches will reenable them on an individual basis. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 964e5735a6a9..81ef938b0a8e 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -101,6 +101,12 @@ */ #define CRYPTO_ALG_INTERNAL 0x00002000 +/* + * Temporary flag used to prevent legacy AEAD implementations from + * being used by user-space. + */ +#define CRYPTO_ALG_AEAD_NEW 0x00004000 + /* * Transform masks and values (for crt_flags). */ -- cgit v1.2.3 From 8ccd0d0ca04147e91890c373677f1e741dda2631 Mon Sep 17 00:00:00 2001 From: Hyungwon Hwang Date: Fri, 12 Jun 2015 21:59:01 +0900 Subject: of: add helper for getting endpoint node of specific identifiers When there are multiple ports or multiple endpoints in a port, they have to be distinguished by the value of reg property. It is common. The drivers can get the specific endpoint in the specific port via this function. Now the drivers have to implement this code in themselves or have to force the order of dt nodes to get the right node. Signed-off-by: Hyungwon Hwang Acked-by: Rob Herring Signed-off-by: Inki Dae --- include/linux/of_graph.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_graph.h b/include/linux/of_graph.h index 7bc92e050608..1c1d5b901a72 100644 --- a/include/linux/of_graph.h +++ b/include/linux/of_graph.h @@ -45,6 +45,8 @@ int of_graph_parse_endpoint(const struct device_node *node, struct device_node *of_graph_get_port_by_id(struct device_node *node, u32 id); struct device_node *of_graph_get_next_endpoint(const struct device_node *parent, struct device_node *previous); +struct device_node *of_graph_get_endpoint_by_regs( + const struct device_node *parent, int port_reg, int reg); struct device_node *of_graph_get_remote_port_parent( const struct device_node *node); struct device_node *of_graph_get_remote_port(const struct device_node *node); @@ -69,6 +71,12 @@ static inline struct device_node *of_graph_get_next_endpoint( return NULL; } +struct device_node *of_graph_get_endpoint_by_regs( + const struct device_node *parent, int port_reg, int reg) +{ + return NULL; +} + static inline struct device_node *of_graph_get_remote_port_parent( const struct device_node *node) { -- cgit v1.2.3 From 7ce7b26f84cfcbcb04f526f56f685a56ccddf355 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 27 Apr 2015 21:54:13 +0900 Subject: mfd: Constify regmap and irq configuration data Constify in various drivers configuration data which is not modified: - regmap_irq_chip, - individual regmap_irq's in array, - regmap_config, - irq_domain_ops, Signed-off-by: Krzysztof Kozlowski Signed-off-by: Lee Jones --- include/linux/mfd/da9055/core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/da9055/core.h b/include/linux/mfd/da9055/core.h index 956afa445998..5dc743fd63a6 100644 --- a/include/linux/mfd/da9055/core.h +++ b/include/linux/mfd/da9055/core.h @@ -89,6 +89,6 @@ static inline int da9055_reg_update(struct da9055 *da9055, unsigned char reg, int da9055_device_init(struct da9055 *da9055); void da9055_device_exit(struct da9055 *da9055); -extern struct regmap_config da9055_regmap_config; +extern const struct regmap_config da9055_regmap_config; #endif /* __DA9055_CORE_H */ -- cgit v1.2.3 From 5c188d748216f67c928d67a42f14b5569b6404a5 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 27 Apr 2015 10:18:14 -0700 Subject: mfd: twl4030-power: Fix pmic for boards that need AC charger disabled I noticed the PMIC configuration on 37xx-evm won't actually shut down the voltages during off-idle. Turns out 37xx-evm needs the AC charger state transitions disabled like we are doing for SDP and LDP in the legacy booting case. Let's fix this for device tree based booting by setting up the quirk flag based on the compatible flag. And let's also use the existing define for STARTON_CHG. Note that SDP and EVM do not have the PMIC clken wired to gate the the oscillator while LDP has. Signed-off-by: Tony Lindgren Signed-off-by: Lee Jones --- include/linux/i2c/twl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h index 0bc03f100d04..9ad7828d9d34 100644 --- a/include/linux/i2c/twl.h +++ b/include/linux/i2c/twl.h @@ -675,6 +675,7 @@ struct twl4030_power_data { struct twl4030_resconfig *board_config; #define TWL4030_RESCONFIG_UNDEF ((u8)-1) bool use_poweroff; /* Board is wired for TWL poweroff */ + bool ac_charger_quirk; /* Disable AC charger on board */ }; extern int twl4030_remove_script(u8 flags); -- cgit v1.2.3 From 8be4efad81d814b607cbdad47176f426be83ba75 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Wed, 6 May 2015 19:07:16 +0200 Subject: mfd: max77686: Remove unused struct max77686_opmode_data The defined struct max77686_opmode_data isn't used neither by the max77686 mfd driver nor the drivers for its sub-devices. Signed-off-by: Javier Martinez Canillas Reviewed-by: Chanwoo Choi Reviewed-by: Krzysztof Kozlowski Signed-off-by: Lee Jones --- include/linux/mfd/max77686.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/max77686.h b/include/linux/mfd/max77686.h index bb995ab9a575..d4b72d519115 100644 --- a/include/linux/mfd/max77686.h +++ b/include/linux/mfd/max77686.h @@ -125,9 +125,4 @@ enum max77686_opmode { MAX77686_OPMODE_STANDBY, }; -struct max77686_opmode_data { - int id; - int mode; -}; - #endif /* __LINUX_MFD_MAX77686_H */ -- cgit v1.2.3 From e6cb73410a6db70eab266f15b7e25053a45b842d Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 11 May 2015 13:58:09 +0100 Subject: mfd: arizona: Add better support for system suspend Allow the chip to completely power off if we enter runtime suspend and there is no jack detection active. This is helpful for systems where system suspend might remove the supplies to the CODEC, without informing us. Note the powering off is done in runtime suspend rather than system suspend, because we need to hold reset until the first time DCVDD is powered anyway (which would be in runtime resume), and we might as well save the extra power. Signed-off-by: Charles Keepax Signed-off-by: Lee Jones --- include/linux/mfd/arizona/core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h index 16a498f48169..847bdaf47f1d 100644 --- a/include/linux/mfd/arizona/core.h +++ b/include/linux/mfd/arizona/core.h @@ -117,6 +117,7 @@ struct arizona { int num_core_supplies; struct regulator_bulk_data core_supplies[ARIZONA_MAX_CORE_SUPPLIES]; struct regulator *dcvdd; + bool has_fully_powered_off; struct arizona_pdata pdata; -- cgit v1.2.3 From fc027d138b79537e5353f3d3bad2bcaac787cd17 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Fri, 1 May 2015 16:15:12 +0100 Subject: mfd: arizona: Split INx_MODE into two fields Later arizona silicon has the single/differential selector in a different register, and IN1_MODE only selects between analogue or digital. Prepare for this by splitting the INx_MODE definition into two fields. Signed-off-by: Richard Fitzgerald Signed-off-by: Lee Jones --- include/linux/mfd/arizona/pdata.h | 5 ++++- include/linux/mfd/arizona/registers.h | 27 ++++++++++++++++++--------- 2 files changed, 22 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h index 1789cb0f4f17..f6722677e6d0 100644 --- a/include/linux/mfd/arizona/pdata.h +++ b/include/linux/mfd/arizona/pdata.h @@ -156,7 +156,10 @@ struct arizona_pdata { /** MICBIAS configurations */ struct arizona_micbias micbias[ARIZONA_MAX_MICBIAS]; - /** Mode of input structures */ + /** + * Mode of input structures + * One of the ARIZONA_INMODE_xxx values + */ int inmode[ARIZONA_MAX_INPUT]; /** Mode for outputs */ diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h index aacc10d7789c..3499d36e6067 100644 --- a/include/linux/mfd/arizona/registers.h +++ b/include/linux/mfd/arizona/registers.h @@ -2515,9 +2515,12 @@ #define ARIZONA_IN1_DMIC_SUP_MASK 0x1800 /* IN1_DMIC_SUP - [12:11] */ #define ARIZONA_IN1_DMIC_SUP_SHIFT 11 /* IN1_DMIC_SUP - [12:11] */ #define ARIZONA_IN1_DMIC_SUP_WIDTH 2 /* IN1_DMIC_SUP - [12:11] */ -#define ARIZONA_IN1_MODE_MASK 0x0600 /* IN1_MODE - [10:9] */ -#define ARIZONA_IN1_MODE_SHIFT 9 /* IN1_MODE - [10:9] */ -#define ARIZONA_IN1_MODE_WIDTH 2 /* IN1_MODE - [10:9] */ +#define ARIZONA_IN1_MODE_MASK 0x0400 /* IN1_MODE - [10] */ +#define ARIZONA_IN1_MODE_SHIFT 10 /* IN1_MODE - [10] */ +#define ARIZONA_IN1_MODE_WIDTH 1 /* IN1_MODE - [10] */ +#define ARIZONA_IN1_SINGLE_ENDED_MASK 0x0200 /* IN1_MODE - [9] */ +#define ARIZONA_IN1_SINGLE_ENDED_SHIFT 9 /* IN1_MODE - [9] */ +#define ARIZONA_IN1_SINGLE_ENDED_WIDTH 1 /* IN1_MODE - [9] */ #define ARIZONA_IN1L_PGA_VOL_MASK 0x00FE /* IN1L_PGA_VOL - [7:1] */ #define ARIZONA_IN1L_PGA_VOL_SHIFT 1 /* IN1L_PGA_VOL - [7:1] */ #define ARIZONA_IN1L_PGA_VOL_WIDTH 7 /* IN1L_PGA_VOL - [7:1] */ @@ -2588,9 +2591,12 @@ #define ARIZONA_IN2_DMIC_SUP_MASK 0x1800 /* IN2_DMIC_SUP - [12:11] */ #define ARIZONA_IN2_DMIC_SUP_SHIFT 11 /* IN2_DMIC_SUP - [12:11] */ #define ARIZONA_IN2_DMIC_SUP_WIDTH 2 /* IN2_DMIC_SUP - [12:11] */ -#define ARIZONA_IN2_MODE_MASK 0x0600 /* IN2_MODE - [10:9] */ -#define ARIZONA_IN2_MODE_SHIFT 9 /* IN2_MODE - [10:9] */ -#define ARIZONA_IN2_MODE_WIDTH 2 /* IN2_MODE - [10:9] */ +#define ARIZONA_IN2_MODE_MASK 0x0400 /* IN2_MODE - [10] */ +#define ARIZONA_IN2_MODE_SHIFT 10 /* IN2_MODE - [10] */ +#define ARIZONA_IN2_MODE_WIDTH 1 /* IN2_MODE - [10] */ +#define ARIZONA_IN2_SINGLE_ENDED_MASK 0x0200 /* IN2_MODE - [9] */ +#define ARIZONA_IN2_SINGLE_ENDED_SHIFT 9 /* IN2_MODE - [9] */ +#define ARIZONA_IN2_SINGLE_ENDED_WIDTH 1 /* IN2_MODE - [9] */ #define ARIZONA_IN2L_PGA_VOL_MASK 0x00FE /* IN2L_PGA_VOL - [7:1] */ #define ARIZONA_IN2L_PGA_VOL_SHIFT 1 /* IN2L_PGA_VOL - [7:1] */ #define ARIZONA_IN2L_PGA_VOL_WIDTH 7 /* IN2L_PGA_VOL - [7:1] */ @@ -2661,9 +2667,12 @@ #define ARIZONA_IN3_DMIC_SUP_MASK 0x1800 /* IN3_DMIC_SUP - [12:11] */ #define ARIZONA_IN3_DMIC_SUP_SHIFT 11 /* IN3_DMIC_SUP - [12:11] */ #define ARIZONA_IN3_DMIC_SUP_WIDTH 2 /* IN3_DMIC_SUP - [12:11] */ -#define ARIZONA_IN3_MODE_MASK 0x0600 /* IN3_MODE - [10:9] */ -#define ARIZONA_IN3_MODE_SHIFT 9 /* IN3_MODE - [10:9] */ -#define ARIZONA_IN3_MODE_WIDTH 2 /* IN3_MODE - [10:9] */ +#define ARIZONA_IN3_MODE_MASK 0x0400 /* IN3_MODE - [10] */ +#define ARIZONA_IN3_MODE_SHIFT 10 /* IN3_MODE - [10] */ +#define ARIZONA_IN3_MODE_WIDTH 1 /* IN3_MODE - [10] */ +#define ARIZONA_IN3_SINGLE_ENDED_MASK 0x0200 /* IN3_MODE - [9] */ +#define ARIZONA_IN3_SINGLE_ENDED_SHIFT 9 /* IN3_MODE - [9] */ +#define ARIZONA_IN3_SINGLE_ENDED_WIDTH 1 /* IN3_MODE - [9] */ #define ARIZONA_IN3L_PGA_VOL_MASK 0x00FE /* IN3L_PGA_VOL - [7:1] */ #define ARIZONA_IN3L_PGA_VOL_SHIFT 1 /* IN3L_PGA_VOL - [7:1] */ #define ARIZONA_IN3L_PGA_VOL_WIDTH 7 /* IN3L_PGA_VOL - [7:1] */ -- cgit v1.2.3 From 7e2d67e94ab05f70d2a73d00283f46778386ca52 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 13 May 2015 16:52:25 +0100 Subject: mfd: arizona: Add stub for wm5102_patch() For the WM5102 there is a dependency in the core code on wm5102_patch() which only exists when CONFIG_MFD_WM5102 is defined. To avoid having to sprinkle #ifdefs around the code it is given an alternative empty stub version when CONFIG_MFD_WM5102 is deselected Signed-off-by: Richard Fitzgerald Signed-off-by: Lee Jones --- include/linux/mfd/arizona/core.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h index 847bdaf47f1d..2f434f4f79a1 100644 --- a/include/linux/mfd/arizona/core.h +++ b/include/linux/mfd/arizona/core.h @@ -154,7 +154,15 @@ int arizona_request_irq(struct arizona *arizona, int irq, char *name, void arizona_free_irq(struct arizona *arizona, int irq, void *data); int arizona_set_irq_wake(struct arizona *arizona, int irq, int on); +#ifdef CONFIG_MFD_WM5102 int wm5102_patch(struct arizona *arizona); +#else +static inline int wm5102_patch(struct arizona *arizona) +{ + return 0; +} +#endif + int wm5110_patch(struct arizona *arizona); int wm8997_patch(struct arizona *arizona); -- cgit v1.2.3 From 5104b7d7678b0029417f6ac08243773a77259ac6 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Wed, 17 Jun 2015 06:17:52 +0930 Subject: module: make perm const Change the struct kernel_param.perm field to a const, as it should never be changed. Signed-off-by: Dan Streetman Signed-off-by: Rusty Russell (cut from larger patch) --- include/linux/moduleparam.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 7e0079936396..ab5031453807 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -68,7 +68,7 @@ enum { struct kernel_param { const char *name; const struct kernel_param_ops *ops; - u16 perm; + const u16 perm; s8 level; u8 flags; union { -- cgit v1.2.3 From b51d23e4e9fea6f264d39535c2a62d1f51e7ccc3 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Wed, 17 Jun 2015 06:18:52 +0930 Subject: module: add per-module param_lock Add a "param_lock" mutex to each module, and update params.c to use the correct built-in or module mutex while locking kernel params. Remove the kparam_block_sysfs_r/w() macros, replace them with direct calls to kernel_param_[un]lock(module). The kernel param code currently uses a single mutex to protect modification of any and all kernel params. While this generally works, there is one specific problem with it; a module callback function cannot safely load another module, i.e. with request_module() or even with indirect calls such as crypto_has_alg(). If the module to be loaded has any of its params configured (e.g. with a /etc/modprobe.d/* config file), then the attempt will result in a deadlock between the first module param callback waiting for modprobe, and modprobe trying to lock the single kernel param mutex to set the new module's param. This fixes that by using per-module mutexes, so that each individual module is protected against concurrent changes in its own kernel params, but is not blocked by changes to other module params. All built-in modules continue to use the built-in mutex, since they will always be loaded at runtime and references (e.g. request_module(), crypto_has_alg()) to them will never cause load-time param changing. This also simplifies the interface used by modules to block sysfs access to their params; while there are currently functions to block and unblock sysfs param access which are split up by read and write and expect a single kernel param to be passed, their actual operation is identical and applies to all params, not just the one passed to them; they simply lock and unlock the global param mutex. They are replaced with direct calls to kernel_param_[un]lock(THIS_MODULE), which locks THIS_MODULE's param_lock, or if the module is built-in, it locks the built-in mutex. Suggested-by: Rusty Russell Signed-off-by: Dan Streetman Signed-off-by: Rusty Russell --- include/linux/module.h | 1 + include/linux/moduleparam.h | 61 +++++++-------------------------------------- 2 files changed, 10 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 4c1b02e1361d..6ba0e87fa804 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -240,6 +240,7 @@ struct module { unsigned int num_syms; /* Kernel parameters. */ + struct mutex param_lock; struct kernel_param *kp; unsigned int num_kp; diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index ab5031453807..f1fdc50520d8 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -67,6 +67,7 @@ enum { struct kernel_param { const char *name; + struct module *mod; const struct kernel_param_ops *ops; const u16 perm; s8 level; @@ -108,7 +109,7 @@ struct kparam_array * * @perm is 0 if the the variable is not to appear in sysfs, or 0444 * for world-readable, 0644 for root-writable, etc. Note that if it - * is writable, you may need to use kparam_block_sysfs_write() around + * is writable, you may need to use kernel_param_lock() around * accesses (esp. charp, which can be kfreed when it changes). * * The @type is simply pasted to refer to a param_ops_##type and a @@ -216,12 +217,12 @@ struct kparam_array parameters. */ #define __module_param_call(prefix, name, ops, arg, perm, level, flags) \ /* Default value instead of permissions? */ \ - static const char __param_str_##name[] = prefix #name; \ + static const char __param_str_##name[] = prefix #name; \ static struct kernel_param __moduleparam_const __param_##name \ __used \ __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \ - = { __param_str_##name, ops, VERIFY_OCTAL_PERMISSIONS(perm), \ - level, flags, { arg } } + = { __param_str_##name, THIS_MODULE, ops, \ + VERIFY_OCTAL_PERMISSIONS(perm), level, flags, { arg } } /* Obsolete - use module_param_cb() */ #define module_param_call(name, set, get, arg, perm) \ @@ -238,58 +239,14 @@ __check_old_set_param(int (*oldset)(const char *, struct kernel_param *)) return 0; } -/** - * kparam_block_sysfs_write - make sure a parameter isn't written via sysfs. - * @name: the name of the parameter - * - * There's no point blocking write on a paramter that isn't writable via sysfs! - */ -#define kparam_block_sysfs_write(name) \ - do { \ - BUG_ON(!(__param_##name.perm & 0222)); \ - __kernel_param_lock(); \ - } while (0) - -/** - * kparam_unblock_sysfs_write - allows sysfs to write to a parameter again. - * @name: the name of the parameter - */ -#define kparam_unblock_sysfs_write(name) \ - do { \ - BUG_ON(!(__param_##name.perm & 0222)); \ - __kernel_param_unlock(); \ - } while (0) - -/** - * kparam_block_sysfs_read - make sure a parameter isn't read via sysfs. - * @name: the name of the parameter - * - * This also blocks sysfs writes. - */ -#define kparam_block_sysfs_read(name) \ - do { \ - BUG_ON(!(__param_##name.perm & 0444)); \ - __kernel_param_lock(); \ - } while (0) - -/** - * kparam_unblock_sysfs_read - allows sysfs to read a parameter again. - * @name: the name of the parameter - */ -#define kparam_unblock_sysfs_read(name) \ - do { \ - BUG_ON(!(__param_##name.perm & 0444)); \ - __kernel_param_unlock(); \ - } while (0) - #ifdef CONFIG_SYSFS -extern void __kernel_param_lock(void); -extern void __kernel_param_unlock(void); +extern void kernel_param_lock(struct module *mod); +extern void kernel_param_unlock(struct module *mod); #else -static inline void __kernel_param_lock(void) +static inline void kernel_param_lock(struct module *mod) { } -static inline void __kernel_param_unlock(void) +static inline void kernel_param_unlock(struct module *mod) { } #endif -- cgit v1.2.3 From dfe816c5e37272f2f3c1311f0e9934e1b4229261 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Fri, 19 Jun 2015 19:47:53 +0530 Subject: macvtap: Increase limit of macvtap queues Macvtap should be compatible with tuntap for maximum number of queues. commit 'baf71c5c1f80d82e92924050a60b5baaf97e3094 (tuntap: Increase the number of queues in tun.)' removes the limitations and increases number of queues in tuntap. Now, Its safe to increase number of queues in Macvtap as well. This patch also modifies 'macvtap_del_queues' function to avoid extra memory allocation in stack. Changes from v1->v2 : Michael S. Tsirkin, Jason Wang : Better way to use linked list to avoid use of extra memory in stack. Sergei Shtylyov : Specify dependent commit's summary. Signed-off-by: Pankaj Gupta Signed-off-by: David S. Miller --- include/linux/if_macvlan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 6f6929ea8a0c..a4ccc3122f93 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -29,7 +29,7 @@ struct macvtap_queue; * Maximum times a macvtap device can be opened. This can be used to * configure the number of receive queue, e.g. for multiqueue virtio. */ -#define MAX_MACVTAP_QUEUES 16 +#define MAX_MACVTAP_QUEUES 256 #define MACVLAN_MC_FILTER_BITS 8 #define MACVLAN_MC_FILTER_SZ (1 << MACVLAN_MC_FILTER_BITS) -- cgit v1.2.3 From 7d4f8d871ab15bd50a5771382ca2c9355b38d73c Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Mon, 22 Jun 2015 00:27:17 -0700 Subject: switchdev; add VLAN support for port's bridge_getlink One more missing piece of the puzzle. Add vlan dump support to switchdev port's bridge_getlink. iproute2 "bridge vlan show" cmd already knows how to show the vlans installed on the bridge and the device , but (until now) no one implemented the port vlan part of the netlink PF_BRIDGE:RTM_GETLINK msg. Before this patch, "bridge vlan show": $ bridge -c vlan show port vlan ids sw1p1 30-34 << bridge side vlans 57 sw1p1 << device side vlans (missing) sw1p2 57 sw1p2 sw1p3 sw1p4 br0 None (When the port is bridged, the output repeats the vlan list for the vlans on the bridge side of the port and the vlans on the device side of the port. The listing above show no vlans for the device side even though they are installed). After this patch: $ bridge -c vlan show port vlan ids sw1p1 30-34 << bridge side vlan 57 sw1p1 30-34 << device side vlans 57 3840 PVID sw1p2 57 sw1p2 57 3840 PVID sw1p3 3842 PVID sw1p4 3843 PVID br0 None I re-used ndo_dflt_bridge_getlink to add vlan fill call-back func. switchdev support adds an obj dump for VLAN objects, using the same call-back scheme as FDB dump. Support included for both compressed and un-compressed vlan dumps. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index a2324fb45cf4..39adaa9529eb 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -114,5 +114,9 @@ extern int ndo_dflt_fdb_del(struct ndmsg *ndm, extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u16 mode, - u32 flags, u32 mask, int nlflags); + u32 flags, u32 mask, int nlflags, + u32 filter_mask, + int (*vlan_fill)(struct sk_buff *skb, + struct net_device *dev, + u32 filter_mask)); #endif /* __LINUX_RTNETLINK_H */ -- cgit v1.2.3 From cca0ba2df3d4000bacd9b7807d46ffafac62d53a Mon Sep 17 00:00:00 2001 From: Hyungwon Hwang Date: Thu, 28 May 2015 16:25:15 +0900 Subject: backlight: Change the return type of backlight_update_status() to int Backlight device returns the result of update_status(), but backlight_update_status() ignores it. So the consumers cannot confirm the result of their function call. This patch makes the result to be returned back for consumers. Signed-off-by: Hyungwon Hwang Acked-by: Jingoo Han Signed-off-by: Lee Jones --- include/linux/backlight.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backlight.h b/include/linux/backlight.h index adb14a8616df..1e7a69adbe6f 100644 --- a/include/linux/backlight.h +++ b/include/linux/backlight.h @@ -117,12 +117,16 @@ struct backlight_device { int use_count; }; -static inline void backlight_update_status(struct backlight_device *bd) +static inline int backlight_update_status(struct backlight_device *bd) { + int ret = -ENOENT; + mutex_lock(&bd->update_lock); if (bd->ops && bd->ops->update_status) - bd->ops->update_status(bd); + ret = bd->ops->update_status(bd); mutex_unlock(&bd->update_lock); + + return ret; } extern struct backlight_device *backlight_device_register(const char *name, -- cgit v1.2.3 From ce16b9d2356125eb791bd920c710b8512eecce54 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Wed, 17 Jun 2015 11:53:53 -0500 Subject: of: define of_find_node_by_phandle for !CONFIG_OF Define stub implementation for of_find_node_by_phandle() API so that users of this API can build properly even when CONFIG_OF is not defined. Fixes x86 randconfig build failure of remoteproc. Signed-off-by: Suman Anna [robh: add details on fixing remoteproc compile] Signed-off-by: Rob Herring --- include/linux/of.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index a3eb9d1e5800..54f858798e8c 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -422,6 +422,11 @@ static inline struct device_node *of_find_node_opts_by_path(const char *path, return NULL; } +static inline struct device_node *of_find_node_by_phandle(phandle handle) +{ + return NULL; +} + static inline struct device_node *of_get_parent(const struct device_node *node) { return NULL; -- cgit v1.2.3 From 9bf39ab2adafd7cf8740859cb49e7b7952813a5d Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 19 Jun 2015 10:29:13 +0200 Subject: vfs: add file_path() helper Turn d_path(&file->f_path, ...); into file_path(file, ...); Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2bd77e10e8e5..2c135ad741a9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2500,6 +2500,8 @@ extern struct file * open_exec(const char *); extern int is_subdir(struct dentry *, struct dentry *); extern int path_is_under(struct path *, struct path *); +extern char *file_path(struct file *, char *, int); + #include /* needed for stackable file system support */ -- cgit v1.2.3 From 2726d56620ce71f40dd583d51391b86e1ab8cc57 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 19 Jun 2015 10:30:28 +0200 Subject: vfs: add seq_file_path() helper Turn seq_path(..., &file->f_path, ...); into seq_file_path(..., file, ...); Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- include/linux/seq_file.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index afbb1fd77c77..912a7c482649 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -123,6 +123,7 @@ __printf(2, 3) int seq_printf(struct seq_file *, const char *, ...); __printf(2, 0) int seq_vprintf(struct seq_file *, const char *, va_list args); int seq_path(struct seq_file *, const struct path *, const char *); +int seq_file_path(struct seq_file *, struct file *, const char *); int seq_dentry(struct seq_file *, struct dentry *, const char *); int seq_path_root(struct seq_file *m, const struct path *path, const struct path *root, const char *esc); -- cgit v1.2.3 From 5fa8e0a1c6a762857ae67d1628c58b9a02362003 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 21 May 2015 16:05:53 +0200 Subject: fs: Rename file_remove_suid() to file_remove_privs() file_remove_suid() is a misnomer since it removes also file capabilities stored in xattrs and sets S_NOSEC flag. Also should_remove_suid() tells something else than whether file_remove_suid() call is necessary which leads to bugs. Signed-off-by: Jan Kara Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2c135ad741a9..641e68d850cf 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2553,7 +2553,7 @@ extern struct inode *new_inode_pseudo(struct super_block *sb); extern struct inode *new_inode(struct super_block *sb); extern void free_inode_nonrcu(struct inode *inode); extern int should_remove_suid(struct dentry *); -extern int file_remove_suid(struct file *); +extern int file_remove_privs(struct file *); extern void __insert_inode_hash(struct inode *, unsigned long hashval); static inline void insert_inode_hash(struct inode *inode) -- cgit v1.2.3 From dbfae0cdcd87602737101d4417811f4323156b54 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 21 May 2015 16:05:54 +0200 Subject: fs: Provide function telling whether file_remove_privs() will do anything Provide function telling whether file_remove_privs() will do anything. Currently we only have should_remove_suid() and that does something slightly different. Signed-off-by: Jan Kara Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 641e68d850cf..ee60e8ab210f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2554,6 +2554,7 @@ extern struct inode *new_inode(struct super_block *sb); extern void free_inode_nonrcu(struct inode *inode); extern int should_remove_suid(struct dentry *); extern int file_remove_privs(struct file *); +extern int file_needs_remove_privs(struct file *file); extern void __insert_inode_hash(struct inode *, unsigned long hashval); static inline void insert_inode_hash(struct inode *inode) -- cgit v1.2.3 From 45f147a1bc97c743c6101a8d2741c69a51f583e4 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 21 May 2015 16:05:55 +0200 Subject: fs: Call security_ops->inode_killpriv on truncate Comment in include/linux/security.h says that ->inode_killpriv() should be called when setuid bit is being removed and that similar security labels (in fact this applies only to file capabilities) should be removed at this time as well. However we don't call ->inode_killpriv() when we remove suid bit on truncate. We fix the problem by calling ->inode_need_killpriv() and subsequently ->inode_killpriv() on truncate the same way as we do it on file write. After this patch there's only one user of should_remove_suid() - ocfs2 - and indeed it's buggy because it doesn't call ->inode_killpriv() on write. However fixing it is difficult because of special locking constraints. Signed-off-by: Jan Kara Signed-off-by: Al Viro --- include/linux/fs.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index ee60e8ab210f..1e658b11c265 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2554,7 +2554,11 @@ extern struct inode *new_inode(struct super_block *sb); extern void free_inode_nonrcu(struct inode *inode); extern int should_remove_suid(struct dentry *); extern int file_remove_privs(struct file *); -extern int file_needs_remove_privs(struct file *file); +extern int dentry_needs_remove_privs(struct dentry *dentry); +static inline int file_needs_remove_privs(struct file *file) +{ + return dentry_needs_remove_privs(file->f_path.dentry); +} extern void __insert_inode_hash(struct inode *, unsigned long hashval); static inline void insert_inode_hash(struct inode *inode) -- cgit v1.2.3 From b57c2cb9ea1a02c2ae08e16de8c20cc13ffbf85a Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Sun, 24 May 2015 17:19:41 +0200 Subject: pagemap.h: move dir_pages() over there That function was declared in a lot of filesystems to calculate directory pages. Signed-off-by: Fabian Frederick Signed-off-by: Al Viro --- include/linux/pagemap.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 4b3736f7065c..808942d31062 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -670,4 +670,10 @@ static inline int add_to_page_cache(struct page *page, return error; } +static inline unsigned long dir_pages(struct inode *inode) +{ + return (unsigned long)(inode->i_size + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; +} + #endif /* _LINUX_PAGEMAP_H */ -- cgit v1.2.3 From dc3f4198eac14e52a98dfc79cd84b45e280f59cd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 18 May 2015 10:10:34 -0400 Subject: make simple_positive() public Signed-off-by: Al Viro --- include/linux/dcache.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 167ec0934049..d2d50249b7b2 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -507,6 +507,11 @@ static inline bool d_really_is_positive(const struct dentry *dentry) return dentry->d_inode != NULL; } +static inline int simple_positive(struct dentry *dentry) +{ + return d_really_is_positive(dentry) && !d_unhashed(dentry); +} + extern void d_set_fallthru(struct dentry *dentry); static inline bool d_is_fallthru(const struct dentry *dentry) -- cgit v1.2.3 From 38183b9c31cf21d8996d6eee2e3a14508b20c418 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 28 May 2015 17:20:58 +1000 Subject: rcu: merge fix for Convert ACCESS_ONCE() to READ_ONCE() and WRITE_ONCE() This mirrors the change introduced by 7d0ae8086b8 of same title in Linus' tree; it's not obvious as a merge resolution since we moved the function. Signed-off-by: Stephen Rothwell Signed-off-by: Rusty Russell --- include/linux/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index eae42c21d5fd..52bdec710ed7 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -467,7 +467,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s */ #define lockless_dereference(p) \ ({ \ - typeof(p) _________p1 = ACCESS_ONCE(p); \ + typeof(p) _________p1 = READ_ONCE(p); \ smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ (_________p1); \ }) -- cgit v1.2.3 From ce0bdb849ad46e4b4e4cae6913b447ae9938bdcf Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Tue, 23 Jun 2015 16:06:44 +0900 Subject: of: fix a build error to of_graph_get_endpoint_by_regs function This patch fixes the below build error reported by Stephen, Stephen reported: After merging the drm-exynos tree, today's linux-next build (x86_64 allmodconfig) failed like this: drivers/media/i2c/adv7604.o: In function `of_graph_get_endpoint_by_regs': adv7604.c:(.text+0x586c): multiple definition of `of_graph_get_endpoint_by_regs' drivers/media/i2c/adv7343.o:adv7343.c:(.text+0xa13): first defined here drivers/media/platform/soc_camera/atmel-isi.o: In function `of_graph_get_endpoint_by_regs': atmel-isi.c:(.text+0x1ec9): multiple definition of `of_graph_get_endpoint_by_regs' drivers/media/platform/soc_camera/soc_camera.o:soc_camera.c:(.text+0x2ce3): first defined here drivers/media/platform/soc_camera/rcar_vin.o: In function `of_graph_get_endpoint_by_regs': rcar_vin.c:(.text+0x307c): multiple definition of `of_graph_get_endpoint_by_regs' drivers/media/platform/soc_camera/soc_camera.o:soc_camera.c:(.text+0x2ce3): first defined here Caused by commit: a0f7001c18ca ("of: add helper for getting endpoint node of specific identifiers") To fix the error, this patch declares of_graph_get_endpoint_by_regs function with "static inline". Signed-off-by: Inki Dae Signed-off-by: Dave Airlie --- include/linux/of_graph.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of_graph.h b/include/linux/of_graph.h index 1c1d5b901a72..f8bcd0e21a26 100644 --- a/include/linux/of_graph.h +++ b/include/linux/of_graph.h @@ -71,7 +71,7 @@ static inline struct device_node *of_graph_get_next_endpoint( return NULL; } -struct device_node *of_graph_get_endpoint_by_regs( +static inline struct device_node *of_graph_get_endpoint_by_regs( const struct device_node *parent, int port_reg, int reg) { return NULL; -- cgit v1.2.3 From 0eeb075fad736fb92620af995c47c204bbb5e829 Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Tue, 23 Jun 2015 13:45:37 -0400 Subject: net: ipv4 sysctl option to ignore routes when nexthop link is down This feature is only enabled with the new per-interface or ipv4 global sysctls called 'ignore_routes_with_linkdown'. net.ipv4.conf.all.ignore_routes_with_linkdown = 0 net.ipv4.conf.default.ignore_routes_with_linkdown = 0 net.ipv4.conf.lo.ignore_routes_with_linkdown = 0 ... When the above sysctls are set, will report to userspace that a route is dead and will no longer resolve to this nexthop when performing a fib lookup. This will signal to userspace that the route will not be selected. The signalling of a RTNH_F_DEAD is only passed to userspace if the sysctl is enabled and link is down. This was done as without it the netlink listeners would have no idea whether or not a nexthop would be selected. The kernel only sets RTNH_F_DEAD internally if the interface has IFF_UP cleared. With the new sysctl set, the following behavior can be observed (interface p8p1 is link-down): default via 10.0.5.2 dev p9p1 10.0.5.0/24 dev p9p1 proto kernel scope link src 10.0.5.15 70.0.0.0/24 dev p7p1 proto kernel scope link src 70.0.0.1 80.0.0.0/24 dev p8p1 proto kernel scope link src 80.0.0.1 dead linkdown 90.0.0.0/24 via 80.0.0.2 dev p8p1 metric 1 dead linkdown 90.0.0.0/24 via 70.0.0.2 dev p7p1 metric 2 90.0.0.1 via 70.0.0.2 dev p7p1 src 70.0.0.1 cache local 80.0.0.1 dev lo src 80.0.0.1 cache 80.0.0.2 via 10.0.5.2 dev p9p1 src 10.0.5.15 cache While the route does remain in the table (so it can be modified if needed rather than being wiped away as it would be if IFF_UP was cleared), the proper next-hop is chosen automatically when the link is down. Now interface p8p1 is linked-up: default via 10.0.5.2 dev p9p1 10.0.5.0/24 dev p9p1 proto kernel scope link src 10.0.5.15 70.0.0.0/24 dev p7p1 proto kernel scope link src 70.0.0.1 80.0.0.0/24 dev p8p1 proto kernel scope link src 80.0.0.1 90.0.0.0/24 via 80.0.0.2 dev p8p1 metric 1 90.0.0.0/24 via 70.0.0.2 dev p7p1 metric 2 192.168.56.0/24 dev p2p1 proto kernel scope link src 192.168.56.2 90.0.0.1 via 80.0.0.2 dev p8p1 src 80.0.0.1 cache local 80.0.0.1 dev lo src 80.0.0.1 cache 80.0.0.2 dev p8p1 src 80.0.0.1 cache and the output changes to what one would expect. If the sysctl is not set, the following output would be expected when p8p1 is down: default via 10.0.5.2 dev p9p1 10.0.5.0/24 dev p9p1 proto kernel scope link src 10.0.5.15 70.0.0.0/24 dev p7p1 proto kernel scope link src 70.0.0.1 80.0.0.0/24 dev p8p1 proto kernel scope link src 80.0.0.1 linkdown 90.0.0.0/24 via 80.0.0.2 dev p8p1 metric 1 linkdown 90.0.0.0/24 via 70.0.0.2 dev p7p1 metric 2 Since the dead flag does not appear, there should be no expectation that the kernel would skip using this route due to link being down. v2: Split kernel changes into 2 patches, this actually makes a behavioral change if the sysctl is set. Also took suggestion from Alex to simplify code by only checking sysctl during fib lookup and suggestion from Scott to add a per-interface sysctl. v3: Code clean-ups to make it more readable and efficient as well as a reverse path check fix. v4: Drop binary sysctl v5: Whitespace fixups from Dave v6: Style changes from Dave and checkpatch suggestions v7: One more checkpatch fixup Signed-off-by: Andy Gospodarek Signed-off-by: Dinesh Dutt Acked-by: Scott Feldman Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 0a21fbefdfbe..a4328cea376a 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -120,6 +120,9 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) || (!IN_DEV_FORWARD(in_dev) && \ IN_DEV_ORCONF((in_dev), ACCEPT_REDIRECTS))) +#define IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) \ + IN_DEV_CONF_GET((in_dev), IGNORE_ROUTES_WITH_LINKDOWN) + #define IN_DEV_ARPFILTER(in_dev) IN_DEV_ORCONF((in_dev), ARPFILTER) #define IN_DEV_ARP_ACCEPT(in_dev) IN_DEV_ORCONF((in_dev), ARP_ACCEPT) #define IN_DEV_ARP_ANNOUNCE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_ANNOUNCE) -- cgit v1.2.3 From be3a5d233922d73f27002ce2767f6ec03c3f473d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Jun 2015 19:51:55 +0800 Subject: NFSv.2/pnfs Add a LAYOUTSTATS rpc function Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- include/linux/nfs4.h | 1 + include/linux/nfs_xdr.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 32201c269890..b8e72aad919c 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -500,6 +500,7 @@ enum { NFSPROC4_CLNT_SEEK, NFSPROC4_CLNT_ALLOCATE, NFSPROC4_CLNT_DEALLOCATE, + NFSPROC4_CLNT_LAYOUTSTATS, }; /* nfs41 types */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index c959e7eb5bd4..7bbe50504211 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -316,6 +316,49 @@ struct nfs4_layoutreturn { int rpc_status; }; +#define PNFS_LAYOUTSTATS_MAXSIZE 256 + +struct nfs42_layoutstat_args; +struct nfs42_layoutstat_devinfo; +typedef void (*layoutstats_encode_t)(struct xdr_stream *, + struct nfs42_layoutstat_args *, + struct nfs42_layoutstat_devinfo *); + +/* Per file per deviceid layoutstats */ +struct nfs42_layoutstat_devinfo { + struct nfs4_deviceid dev_id; + __u64 offset; + __u64 length; + __u64 read_count; + __u64 read_bytes; + __u64 write_count; + __u64 write_bytes; + __u32 layout_type; + layoutstats_encode_t layoutstats_encode; + void *layout_private; +}; + +struct nfs42_layoutstat_args { + struct nfs4_sequence_args seq_args; + struct nfs_fh *fh; + struct inode *inode; + nfs4_stateid stateid; + int num_dev; + struct nfs42_layoutstat_devinfo *devinfo; +}; + +struct nfs42_layoutstat_res { + struct nfs4_sequence_res seq_res; + int num_dev; + int rpc_status; +}; + +struct nfs42_layoutstat_data { + struct inode *inode; + struct nfs42_layoutstat_args args; + struct nfs42_layoutstat_res res; +}; + struct stateowner_id { __u64 create_time; __u32 uniquifier; -- cgit v1.2.3 From 1bfe3b259ff2e579b2acb190f874397d53274497 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 23 Jun 2015 19:52:03 +0800 Subject: nfs42: serialize LAYOUTSTATS calls of the same file There is no need to report concurrently. Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b95f914ce083..f91b5ade30c9 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -219,6 +219,7 @@ struct nfs_inode { #define NFS_INO_COMMIT (7) /* inode is committing unstable writes */ #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ +#define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */ static inline struct nfs_inode *NFS_I(const struct inode *inode) { -- cgit v1.2.3 From c181fb3e723351e2f7a1f76b6c0627a4b8ad1723 Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Mon, 22 Jun 2015 22:38:53 +0200 Subject: ACPI / OF: Rename of_node() and acpi_node() to to_of_node() and to_acpi_node() Commit 8a0662d9 introduced of_node and acpi_node symbols in global namespace but there were already ~63 of_node local variables or function parameters (no single acpi_node though, but anyway). After debugging undefined but used of_node local varible (which turned out to reference static function of_node() instead) it became clear that the names for the functions are too short and too generic for global scope. Signed-off-by: Alexander Sverdlin Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 4 ++-- include/linux/of.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index e4da5e35e29c..ec3c98ed9dca 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -53,7 +53,7 @@ static inline acpi_handle acpi_device_handle(struct acpi_device *adev) return adev ? adev->handle : NULL; } -#define ACPI_COMPANION(dev) acpi_node((dev)->fwnode) +#define ACPI_COMPANION(dev) to_acpi_node((dev)->fwnode) #define ACPI_COMPANION_SET(dev, adev) set_primary_fwnode(dev, (adev) ? \ acpi_fwnode_handle(adev) : NULL) #define ACPI_HANDLE(dev) acpi_device_handle(ACPI_COMPANION(dev)) @@ -473,7 +473,7 @@ static inline bool is_acpi_node(struct fwnode_handle *fwnode) return false; } -static inline struct acpi_device *acpi_node(struct fwnode_handle *fwnode) +static inline struct acpi_device *to_acpi_node(struct fwnode_handle *fwnode) { return NULL; } diff --git a/include/linux/of.h b/include/linux/of.h index b871ff9d81d7..f05fdcea4e66 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -128,7 +128,7 @@ static inline bool is_of_node(struct fwnode_handle *fwnode) return fwnode && fwnode->type == FWNODE_OF; } -static inline struct device_node *of_node(struct fwnode_handle *fwnode) +static inline struct device_node *to_of_node(struct fwnode_handle *fwnode) { return fwnode ? container_of(fwnode, struct device_node, fwnode) : NULL; } @@ -387,7 +387,7 @@ static inline bool is_of_node(struct fwnode_handle *fwnode) return false; } -static inline struct device_node *of_node(struct fwnode_handle *fwnode) +static inline struct device_node *to_of_node(struct fwnode_handle *fwnode) { return NULL; } -- cgit v1.2.3 From 9200025724619d83f9fc366281f0bde36afe6e5a Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Fri, 12 Jun 2015 10:04:10 +0800 Subject: rtc: Introduce rtc_tm_sub() helper function There're many sites need comparing the two rtc_time variants for many rtc drivers, especially in the instances of rtc_class_ops::set_alarm(). So add this common helper function to make things easy. Suggested-by: Arnd Bergmann Signed-off-by: Xunlei Pang Signed-off-by: Alexandre Belloni --- include/linux/rtc.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 587017e7939c..b36160321458 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -24,6 +24,14 @@ extern void rtc_time64_to_tm(time64_t time, struct rtc_time *tm); ktime_t rtc_tm_to_ktime(struct rtc_time tm); struct rtc_time rtc_ktime_to_tm(ktime_t kt); +/* + * rtc_tm_sub - Return the difference in seconds. + */ +static inline time64_t rtc_tm_sub(struct rtc_time *lhs, struct rtc_time *rhs) +{ + return rtc_tm_to_time64(lhs) - rtc_tm_to_time64(rhs); +} + /** * Deprecated. Use rtc_time64_to_tm(). */ -- cgit v1.2.3 From c86a6c28957a9e8e9a71582a32e96971ad411ffe Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Fri, 12 Jun 2015 11:10:18 +0800 Subject: rtc: interface: Remove rtc_set_mmss() Now rtc_set_mmss() has no users, just remove it. We still have rtc_set_time() doing similar things. Signed-off-by: Xunlei Pang Signed-off-by: Alexandre Belloni --- include/linux/rtc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index b36160321458..3359f0422c6b 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -168,7 +168,6 @@ extern void devm_rtc_device_unregister(struct device *dev, extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm); extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm); -extern int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs); extern int rtc_set_ntp_time(struct timespec64 now); int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm); extern int rtc_read_alarm(struct rtc_device *rtc, -- cgit v1.2.3 From c3cddc4c296c3a1498df7181015cbcea178a0546 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 24 Jun 2015 16:54:45 -0700 Subject: fsnotify: remove obsolete documentation should_send_event is no longer part of struct fsnotify_ops, so remove it. Signed-off-by: Nikolay Borisov Reviewed-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fsnotify_backend.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 0f313f93c586..65a517dd32f7 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -84,8 +84,6 @@ struct fsnotify_fname; * Each group much define these ops. The fsnotify infrastructure will call * these operations for each relevant group. * - * should_send_event - given a group, inode, and mask this function determines - * if the group is interested in this event. * handle_event - main call for a group to handle an fs event * free_group_priv - called when a group refcnt hits 0 to clean up the private union * freeing_mark - called when a mark is being destroyed for some reason. The group -- cgit v1.2.3 From 5286d20c4eb7b0c29217f8756652609df74f5489 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 24 Jun 2015 16:54:51 -0700 Subject: configfs: unexport/make static config_item_init() config_item_init() is only used in item.c Signed-off-by: Fabian Frederick Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/configfs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 34025df61829..c9e5c57e4edf 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -71,7 +71,6 @@ static inline char *config_item_name(struct config_item * item) return item->ci_name; } -extern void config_item_init(struct config_item *); extern void config_item_init_type_name(struct config_item *item, const char *name, struct config_item_type *type); -- cgit v1.2.3 From b5242e98c1cb834feb1e84026f09a4796b49eb4d Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 24 Jun 2015 16:55:42 -0700 Subject: smpboot: allow excluding cpus from the smpboot threads This patch series allows the watchdog to run by default only on the housekeeping cores when nohz_full is in effect; this seems to be a good compromise short of turning it off completely (since the nohz_full cores can't tolerate a watchdog). To provide customizability, we add /proc/sys/kernel/watchdog_cpumask so that the set of cores running the watchdog can be tuned to different values after bootup. To implement this customizability, we add a new smpboot_update_cpumask_percpu_thread() API to the smpboot_thread subsystem that lets us park or unpark "unwanted" threads. And now that threads can be parked for long periods of time, we tweak the /proc//stat and /proc//status code so parked threads aren't reported as running, which is otherwise confusing. This patch (of 3): This change allows some cores to be excluded from running the smp_hotplug_thread tasks. The following commit to update kernel/watchdog.c to use this functionality is the motivating example, and more information on the motivation is provided there. A new smp_hotplug_thread field is introduced, "cpumask", which is cpumask field managed by the smpboot subsystem that indicates whether or not the given smp_hotplug_thread should run on that core; the cpumask is checked when deciding whether to unpark the thread. To limit the cpumask to less than cpu_possible, you must call smpboot_update_cpumask_percpu_thread() after registering. Signed-off-by: Chris Metcalf Cc: Don Zickus Cc: Ingo Molnar Cc: Ulrich Obergfell Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Frederic Weisbecker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/smpboot.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h index d600afb21926..da3c593f9845 100644 --- a/include/linux/smpboot.h +++ b/include/linux/smpboot.h @@ -27,6 +27,8 @@ struct smpboot_thread_data; * @pre_unpark: Optional unpark function, called before the thread is * unparked (cpu online). This is not guaranteed to be * called on the target cpu of the thread. Careful! + * @cpumask: Internal state. To update which threads are unparked, + * call smpboot_update_cpumask_percpu_thread(). * @selfparking: Thread is not parked by the park function. * @thread_comm: The base name of the thread */ @@ -41,11 +43,14 @@ struct smp_hotplug_thread { void (*park)(unsigned int cpu); void (*unpark)(unsigned int cpu); void (*pre_unpark)(unsigned int cpu); + cpumask_var_t cpumask; bool selfparking; const char *thread_comm; }; int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread); void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread); +int smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread, + const struct cpumask *); #endif -- cgit v1.2.3 From fe4ba3c34352b7e8068b7f18eb233444aed17011 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 24 Jun 2015 16:55:45 -0700 Subject: watchdog: add watchdog_cpumask sysctl to assist nohz Change the default behavior of watchdog so it only runs on the housekeeping cores when nohz_full is enabled at build and boot time. Allow modifying the set of cores the watchdog is currently running on with a new kernel.watchdog_cpumask sysctl. In the current system, the watchdog subsystem runs a periodic timer that schedules the watchdog kthread to run. However, nohz_full cores are designed to allow userspace application code running on those cores to have 100% access to the CPU. So the watchdog system prevents the nohz_full application code from being able to run the way it wants to, thus the motivation to suppress the watchdog on nohz_full cores, which this patchset provides by default. However, if we disable the watchdog globally, then the housekeeping cores can't benefit from the watchdog functionality. So we allow disabling it only on some cores. See Documentation/lockup-watchdogs.txt for more information. [jhubbard@nvidia.com: fix a watchdog crash in some configurations] Signed-off-by: Chris Metcalf Acked-by: Don Zickus Cc: Ingo Molnar Cc: Ulrich Obergfell Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Frederic Weisbecker Signed-off-by: John Hubbard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nmi.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 3d46fb4708e0..f94da0e65dea 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -67,6 +67,7 @@ extern int nmi_watchdog_enabled; extern int soft_watchdog_enabled; extern int watchdog_user_enabled; extern int watchdog_thresh; +extern unsigned long *watchdog_cpumask_bits; extern int sysctl_softlockup_all_cpu_backtrace; struct ctl_table; extern int proc_watchdog(struct ctl_table *, int , @@ -77,6 +78,8 @@ extern int proc_soft_watchdog(struct ctl_table *, int , void __user *, size_t *, loff_t *); extern int proc_watchdog_thresh(struct ctl_table *, int , void __user *, size_t *, loff_t *); +extern int proc_watchdog_cpumask(struct ctl_table *, int, + void __user *, size_t *, loff_t *); #endif #ifdef CONFIG_HAVE_ACPI_APEI_NMI -- cgit v1.2.3 From 4066c33d0308f87e9a3b0c7fafb9141c0bfbfa77 Mon Sep 17 00:00:00 2001 From: Gavin Guo Date: Wed, 24 Jun 2015 16:55:54 -0700 Subject: mm/slab_common: support the slub_debug boot option on specific object size The slub_debug=PU,kmalloc-xx cannot work because in the create_kmalloc_caches() the s->name is created after the create_kmalloc_cache() is called. The name is NULL in the create_kmalloc_cache() so the kmem_cache_flags() would not set the slub_debug flags to the s->flags. The fix here set up a kmalloc_names string array for the initialization purpose and delete the dynamic name creation of kmalloc_caches. [akpm@linux-foundation.org: s/kmalloc_names/kmalloc_info/, tweak comment text] Signed-off-by: Gavin Guo Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index ffd24c830151..96f0ea506b5c 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -153,8 +153,30 @@ size_t ksize(const void *); #define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN #define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN #define KMALLOC_SHIFT_LOW ilog2(ARCH_DMA_MINALIGN) +/* + * The KMALLOC_LOOP_LOW is the definition for the for loop index start number + * to create the kmalloc_caches object in create_kmalloc_caches(). The first + * and the second are 96 and 192. You can see that in the kmalloc_index(), if + * the KMALLOC_MIN_SIZE <= 32, then return 1 (96). If KMALLOC_MIN_SIZE <= 64, + * then return 2 (192). If the KMALLOC_MIN_SIZE is bigger than 64, we don't + * need to initialize 96 and 192. Go directly to start the KMALLOC_SHIFT_LOW. + */ +#if KMALLOC_MIN_SIZE <= 32 +#define KMALLOC_LOOP_LOW 1 +#elif KMALLOC_MIN_SIZE <= 64 +#define KMALLOC_LOOP_LOW 2 +#else +#define KMALLOC_LOOP_LOW KMALLOC_SHIFT_LOW +#endif + #else #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) +/* + * The KMALLOC_MIN_SIZE of slub/slab/slob is 2^3/2^5/2^3. So, even slab is used. + * The KMALLOC_MIN_SIZE <= 32. The kmalloc-96 and kmalloc-192 should also be + * initialized. + */ +#define KMALLOC_LOOP_LOW 1 #endif /* -- cgit v1.2.3 From 1ed58b6051b67e5cfe9e465fb60bf7d5f55e0a64 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 24 Jun 2015 16:55:59 -0700 Subject: linux/slab.h: fix three off-by-one typos in comment The first is a keyboard-off-by-one, the other two the ordinary mathy kind. Signed-off-by: Rasmus Villemoes Acked-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 96f0ea506b5c..9de2fdc8b5e4 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -262,8 +262,8 @@ extern struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; * belongs to. * 0 = zero alloc * 1 = 65 .. 96 bytes - * 2 = 120 .. 192 bytes - * n = 2^(n-1) .. 2^n -1 + * 2 = 129 .. 192 bytes + * n = 2^(n-1)+1 .. 2^n */ static __always_inline int kmalloc_index(size_t size) { -- cgit v1.2.3 From 2ae416b142b625c58c9ccb039aa3ef48ad0e9bae Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Wed, 24 Jun 2015 16:56:16 -0700 Subject: mm: new mm hook framework CRIU is recreating the process memory layout by remapping the checkpointee memory area on top of the current process (criu). This includes remapping the vDSO to the place it has at checkpoint time. However some architectures like powerpc are keeping a reference to the vDSO base address to build the signal return stack frame by calling the vDSO sigreturn service. So once the vDSO has been moved, this reference is no more valid and the signal frame built later are not usable. This patch serie is introducing a new mm hook framework, and a new arch_remap hook which is called when mremap is done and the mm lock still hold. The next patch is adding the vDSO remap and unmap tracking to the powerpc architecture. This patch (of 3): This patch introduces a new set of header file to manage mm hooks: - per architecture empty header file (arch/x/include/asm/mm-arch-hooks.h) - a generic header (include/linux/mm-arch-hooks.h) The architecture which need to overwrite a hook as to redefine it in its header file, while architecture which doesn't need have nothing to do. The default hooks are defined in the generic header and are used in the case the architecture is not defining it. In a next step, mm hooks defined in include/asm-generic/mm_hooks.h should be moved here. Signed-off-by: Laurent Dufour Suggested-by: Andrew Morton Cc: "Kirill A. Shutemov" Cc: Hugh Dickins Cc: Rik van Riel Cc: Mel Gorman Cc: Pavel Emelyanov Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm-arch-hooks.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/linux/mm-arch-hooks.h (limited to 'include/linux') diff --git a/include/linux/mm-arch-hooks.h b/include/linux/mm-arch-hooks.h new file mode 100644 index 000000000000..63005e367abd --- /dev/null +++ b/include/linux/mm-arch-hooks.h @@ -0,0 +1,16 @@ +/* + * Generic mm no-op hooks. + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _LINUX_MM_ARCH_HOOKS_H +#define _LINUX_MM_ARCH_HOOKS_H + +#include + +#endif /* _LINUX_MM_ARCH_HOOKS_H */ -- cgit v1.2.3 From 4abad2ca4a4dbdd4a218c12451231ab628f2e60c Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Wed, 24 Jun 2015 16:56:19 -0700 Subject: mm: new arch_remap() hook Some architectures would like to be triggered when a memory area is moved through the mremap system call. This patch introduces a new arch_remap() mm hook which is placed in the path of mremap, and is called before the old area is unmapped (and the arch_unmap() hook is called). Signed-off-by: Laurent Dufour Cc: "Kirill A. Shutemov" Cc: Hugh Dickins Cc: Rik van Riel Cc: Mel Gorman Cc: Pavel Emelyanov Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm-arch-hooks.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm-arch-hooks.h b/include/linux/mm-arch-hooks.h index 63005e367abd..4efc3f56e6df 100644 --- a/include/linux/mm-arch-hooks.h +++ b/include/linux/mm-arch-hooks.h @@ -13,4 +13,13 @@ #include +#ifndef arch_remap +static inline void arch_remap(struct mm_struct *mm, + unsigned long old_start, unsigned long old_end, + unsigned long new_start, unsigned long new_end) +{ +} +#define arch_remap arch_remap +#endif + #endif /* _LINUX_MM_ARCH_HOOKS_H */ -- cgit v1.2.3 From a9919c79359df9a706ff9e14fc0a93cd15791c9b Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 24 Jun 2015 16:56:28 -0700 Subject: mm: only define hashdist variable when needed For !CONFIG_NUMA, hashdist will always be 0, since it's setter is otherwise compiled out. So we can save 4 bytes of data and some .text (although mostly in __init functions) by only defining it for CONFIG_NUMA. Signed-off-by: Rasmus Villemoes Acked-by: David Rientjes Reviewed-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bootmem.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 0995c2de8162..f589222bfa87 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -357,12 +357,12 @@ extern void *alloc_large_system_hash(const char *tablename, /* Only NUMA needs hash distribution. 64bit NUMA architectures have * sufficient vmalloc space. */ -#if defined(CONFIG_NUMA) && defined(CONFIG_64BIT) -#define HASHDIST_DEFAULT 1 +#ifdef CONFIG_NUMA +#define HASHDIST_DEFAULT IS_ENABLED(CONFIG_64BIT) +extern int hashdist; /* Distribute hashes across NUMA nodes? */ #else -#define HASHDIST_DEFAULT 0 +#define hashdist (0) #endif -extern int hashdist; /* Distribute hashes across NUMA nodes? */ #endif /* _LINUX_BOOTMEM_H */ -- cgit v1.2.3 From c761471b58e6138938ebc6eafec20b2f60cb3397 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 24 Jun 2015 16:56:33 -0700 Subject: mm: avoid tail page refcounting on non-THP compound pages Reintroduce 8d63d99a5dfb ("mm: avoid tail page refcounting on non-THP compound pages") after removing bogus VM_BUG_ON_PAGE() in put_unrefcounted_compound_page(). THP uses tail page refcounting to be able to split huge pages at any time. Tail page refcounting is not needed for other users of compound pages and it's harmful because of overhead. We try to exclude non-THP pages from tail page refcounting using __compound_tail_refcounted() check. It excludes most common non-THP compound pages: SL*B and hugetlb, but it doesn't catch rest of __GFP_COMP users -- drivers. And it's not only about overhead. Drivers might want to use compound pages to get refcounting semantics suitable for mapping high-order pages to userspace. But tail page refcounting breaks it. Tail page refcounting uses ->_mapcount in tail pages to store GUP pins on them. It means GUP pins would affect page_mapcount() for tail pages. It's not a problem for THP, because it never maps tail pages. But unlike THP, drivers map parts of compound pages with PTEs and it makes page_mapcount() be called for tail pages. In particular, GUP pins would shift PSS up and affect /proc/kpagecount for such pages. But, I'm not aware about anything which can lead to crash or other serious misbehaviour. Since currently all THP pages are anonymous and all drivers pages are not, we can fix the __compound_tail_refcounted() check by requiring PageAnon() to enable tail page refcounting. Signed-off-by: Kirill A. Shutemov Acked-by: Hugh Dickins Reviewed-by: Andrea Arcangeli Reported-by: Borislav Petkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 0755b9fd03a7..8b086070c3a5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -499,7 +499,7 @@ static inline int page_count(struct page *page) static inline bool __compound_tail_refcounted(struct page *page) { - return !PageSlab(page) && !PageHeadHuge(page); + return PageAnon(page) && !PageSlab(page) && !PageHeadHuge(page); } /* -- cgit v1.2.3 From ead07f6a867b5b1b41cf703735e8b39094987a7d Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 24 Jun 2015 16:56:48 -0700 Subject: mm/memory-failure: introduce get_hwpoison_page() for consistent refcount handling memory_failure() can run in 2 different mode (specified by MF_COUNT_INCREASED) in page refcount perspective. When MF_COUNT_INCREASED is set, memory_failure() assumes that the caller takes a refcount of the target page. And if cleared, memory_failure() takes it in it's own. In current code, however, refcounting is done differently in each caller. For example, madvise_hwpoison() uses get_user_pages_fast() and hwpoison_inject() uses get_page_unless_zero(). So this inconsistent refcounting causes refcount failure especially for thp tail pages. Typical user visible effects are like memory leak or VM_BUG_ON_PAGE(!page_count(page)) in isolate_lru_page(). To fix this refcounting issue, this patch introduces get_hwpoison_page() to handle thp tail pages in the same manner for each caller of hwpoison code. memory_failure() might fail to split thp and in such case it returns without completing page isolation. This is not good because PageHWPoison on the thp is still set and there's no easy way to unpoison such thps. So this patch try to roll back any action to the thp in "non anonymous thp" case and "thp split failed" case, expecting an MCE(SRAR) generated by later access afterward will properly free such thps. [akpm@linux-foundation.org: fix CONFIG_HWPOISON_INJECT=m] Signed-off-by: Naoya Horiguchi Cc: Andi Kleen Cc: Tony Luck Cc: "Kirill A. Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8b086070c3a5..cd9df66138a1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2146,6 +2146,7 @@ enum mf_flags { extern int memory_failure(unsigned long pfn, int trapno, int flags); extern void memory_failure_queue(unsigned long pfn, int trapno, int flags); extern int unpoison_memory(unsigned long pfn); +extern int get_hwpoison_page(struct page *page); extern int sysctl_memory_failure_early_kill; extern int sysctl_memory_failure_recovery; extern void shake_page(struct page *p, int access); -- cgit v1.2.3 From 16e951966f05da5ccd650104176f6ba289f7fa20 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 24 Jun 2015 16:57:07 -0700 Subject: mm: oom_kill: clean up victim marking and exiting interfaces Rename unmark_oom_victim() to exit_oom_victim(). Marking and unmarking are related in functionality, but the interface is not symmetrical at all: one is an internal OOM killer function used during the killing, the other is for an OOM victim to signal its own death on exit later on. This has locking implications, see follow-up changes. While at it, rename mark_tsk_oom_victim() to mark_oom_victim(), which is easier on the eye. Signed-off-by: Johannes Weiner Acked-by: David Rientjes Acked-by: Michal Hocko Cc: Tetsuo Handa Cc: Andrea Arcangeli Cc: Dave Chinner Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/oom.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/oom.h b/include/linux/oom.h index 44b2f6f7bbd8..a8e6a498cbcb 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -47,9 +47,7 @@ static inline bool oom_task_origin(const struct task_struct *p) return !!(p->signal->oom_flags & OOM_FLAG_ORIGIN); } -extern void mark_tsk_oom_victim(struct task_struct *tsk); - -extern void unmark_oom_victim(void); +extern void mark_oom_victim(struct task_struct *tsk); extern unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, @@ -75,6 +73,9 @@ extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task, extern bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order, nodemask_t *mask, bool force_kill); + +extern void exit_oom_victim(void); + extern int register_oom_notifier(struct notifier_block *nb); extern int unregister_oom_notifier(struct notifier_block *nb); -- cgit v1.2.3 From dc56401fc9f25e8f93899991ec858c98a331d88c Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 24 Jun 2015 16:57:19 -0700 Subject: mm: oom_kill: simplify OOM killer locking The zonelist locking and the oom_sem are two overlapping locks that are used to serialize global OOM killing against different things. The historical zonelist locking serializes OOM kills from allocations with overlapping zonelists against each other to prevent killing more tasks than necessary in the same memory domain. Only when neither tasklists nor zonelists from two concurrent OOM kills overlap (tasks in separate memcgs bound to separate nodes) are OOM kills allowed to execute in parallel. The younger oom_sem is a read-write lock to serialize OOM killing against the PM code trying to disable the OOM killer altogether. However, the OOM killer is a fairly cold error path, there is really no reason to optimize for highly performant and concurrent OOM kills. And the oom_sem is just flat-out redundant. Replace both locking schemes with a single global mutex serializing OOM kills regardless of context. Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Acked-by: David Rientjes Cc: Tetsuo Handa Cc: Andrea Arcangeli Cc: Dave Chinner Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/oom.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/oom.h b/include/linux/oom.h index a8e6a498cbcb..7deecb7bca5e 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -32,6 +32,8 @@ enum oom_scan_t { /* Thread is the potential origin of an oom condition; kill first on oom */ #define OOM_FLAG_ORIGIN ((__force oom_flags_t)0x1) +extern struct mutex oom_lock; + static inline void set_current_oom_origin(void) { current->signal->oom_flags |= OOM_FLAG_ORIGIN; @@ -60,9 +62,6 @@ extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, struct mem_cgroup *memcg, nodemask_t *nodemask, const char *message); -extern bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_flags); -extern void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_flags); - extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, int order, const nodemask_t *nodemask, struct mem_cgroup *memcg); -- cgit v1.2.3 From cc637b1704d78b068c2eb700eec384c69ea56cdf Mon Sep 17 00:00:00 2001 From: Xie XiuQi Date: Wed, 24 Jun 2015 16:57:30 -0700 Subject: memory-failure: export page_type and action result Export 'outcome' and 'action_page_type' to mm.h, so we could use this emnus outside. This patch is preparation for adding trace events for memory-failure recovery action. Signed-off-by: Xie XiuQi Acked-by: Naoya Horiguchi Cc: Chen Gong Cc: Jim Davis Cc: Steven Rostedt Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index cd9df66138a1..986a9a221ee0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2153,6 +2153,40 @@ extern void shake_page(struct page *p, int access); extern atomic_long_t num_poisoned_pages; extern int soft_offline_page(struct page *page, int flags); + +/* + * Error handlers for various types of pages. + */ +enum mf_outcome { + MF_IGNORED, /* Error: cannot be handled */ + MF_FAILED, /* Error: handling failed */ + MF_DELAYED, /* Will be handled later */ + MF_RECOVERED, /* Successfully recovered */ +}; + +enum mf_action_page_type { + MF_MSG_KERNEL, + MF_MSG_KERNEL_HIGH_ORDER, + MF_MSG_SLAB, + MF_MSG_DIFFERENT_COMPOUND, + MF_MSG_POISONED_HUGE, + MF_MSG_HUGE, + MF_MSG_FREE_HUGE, + MF_MSG_UNMAP_FAILED, + MF_MSG_DIRTY_SWAPCACHE, + MF_MSG_CLEAN_SWAPCACHE, + MF_MSG_DIRTY_MLOCKED_LRU, + MF_MSG_CLEAN_MLOCKED_LRU, + MF_MSG_DIRTY_UNEVICTABLE_LRU, + MF_MSG_CLEAN_UNEVICTABLE_LRU, + MF_MSG_DIRTY_LRU, + MF_MSG_CLEAN_LRU, + MF_MSG_TRUNCATED_LRU, + MF_MSG_BUDDY, + MF_MSG_BUDDY_2ND, + MF_MSG_UNKNOWN, +}; + #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS) extern void clear_huge_page(struct page *page, unsigned long addr, -- cgit v1.2.3 From cc3e2af42e7b7e0457b93bf17c19b44c635cd40c Mon Sep 17 00:00:00 2001 From: Xie XiuQi Date: Wed, 24 Jun 2015 16:57:33 -0700 Subject: memory-failure: change type of action_result's param 3 to enum Change type of action_result's param 3 to enum for type consistency, and rename mf_outcome to mf_result for clearly. Signed-off-by: Xie XiuQi Acked-by: Naoya Horiguchi Cc: Chen Gong Cc: Jim Davis Cc: Steven Rostedt Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 986a9a221ee0..24ad583596d1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2157,7 +2157,7 @@ extern int soft_offline_page(struct page *page, int flags); /* * Error handlers for various types of pages. */ -enum mf_outcome { +enum mf_result { MF_IGNORED, /* Error: cannot be handled */ MF_FAILED, /* Error: handling failed */ MF_DELAYED, /* Will be handled later */ -- cgit v1.2.3 From 8809aa2d28d74111ff2f1928edaa4e9845c97a7d Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 24 Jun 2015 16:57:44 -0700 Subject: mm: clarify that the function operates on hugepage pte We have confusing functions to clear pmd, pmd_clear_* and pmd_clear. Add _huge_ to pmdp_clear functions so that we are clear that they operate on hugepage pte. We don't bother about other functions like pmdp_set_wrprotect, pmdp_clear_flush_young, because they operate on PTE bits and hence indicate they are operating on hugepage ptes Signed-off-by: Aneesh Kumar K.V Acked-by: Kirill A. Shutemov Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Andrea Arcangeli Cc: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmu_notifier.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 95243d28a0ee..61cd67f4d788 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -324,25 +324,25 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) ___pte; \ }) -#define pmdp_clear_flush_notify(__vma, __haddr, __pmd) \ +#define pmdp_huge_clear_flush_notify(__vma, __haddr, __pmd) \ ({ \ unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \ struct mm_struct *___mm = (__vma)->vm_mm; \ pmd_t ___pmd; \ \ - ___pmd = pmdp_clear_flush(__vma, __haddr, __pmd); \ + ___pmd = pmdp_huge_clear_flush(__vma, __haddr, __pmd); \ mmu_notifier_invalidate_range(___mm, ___haddr, \ ___haddr + HPAGE_PMD_SIZE); \ \ ___pmd; \ }) -#define pmdp_get_and_clear_notify(__mm, __haddr, __pmd) \ +#define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd) \ ({ \ unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \ pmd_t ___pmd; \ \ - ___pmd = pmdp_get_and_clear(__mm, __haddr, __pmd); \ + ___pmd = pmdp_huge_get_and_clear(__mm, __haddr, __pmd); \ mmu_notifier_invalidate_range(__mm, ___haddr, \ ___haddr + HPAGE_PMD_SIZE); \ \ @@ -428,8 +428,8 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) #define ptep_clear_flush_young_notify ptep_clear_flush_young #define pmdp_clear_flush_young_notify pmdp_clear_flush_young #define ptep_clear_flush_notify ptep_clear_flush -#define pmdp_clear_flush_notify pmdp_clear_flush -#define pmdp_get_and_clear_notify pmdp_get_and_clear +#define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush +#define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear #define set_pte_at_notify set_pte_at #endif /* CONFIG_MMU_NOTIFIER */ -- cgit v1.2.3 From fc6daaf93151877748f8096af6b3fddb147f22d6 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 24 Jun 2015 16:58:09 -0700 Subject: mm/memblock: add extra "flags" to memblock to allow selection of memory based on attribute Some high end Intel Xeon systems report uncorrectable memory errors as a recoverable machine check. Linux has included code for some time to process these and just signal the affected processes (or even recover completely if the error was in a read only page that can be replaced by reading from disk). But we have no recovery path for errors encountered during kernel code execution. Except for some very specific cases were are unlikely to ever be able to recover. Enter memory mirroring. Actually 3rd generation of memory mirroing. Gen1: All memory is mirrored Pro: No s/w enabling - h/w just gets good data from other side of the mirror Con: Halves effective memory capacity available to OS/applications Gen2: Partial memory mirror - just mirror memory begind some memory controllers Pro: Keep more of the capacity Con: Nightmare to enable. Have to choose between allocating from mirrored memory for safety vs. NUMA local memory for performance Gen3: Address range partial memory mirror - some mirror on each memory controller Pro: Can tune the amount of mirror and keep NUMA performance Con: I have to write memory management code to implement The current plan is just to use mirrored memory for kernel allocations. This has been broken into two phases: 1) This patch series - find the mirrored memory, use it for boot time allocations 2) Wade into mm/page_alloc.c and define a ZONE_MIRROR to pick up the unused mirrored memory from mm/memblock.c and only give it out to select kernel allocations (this is still being scoped because page_alloc.c is scary). This patch (of 3): Add extra "flags" to memblock to allow selection of memory based on attribute. No functional changes Signed-off-by: Tony Luck Cc: Xishi Qiu Cc: Hanjun Guo Cc: Xiexiuqi Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Yinghai Lu Cc: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 9497ec7c77ea..7aeec0cb4c27 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -21,7 +21,10 @@ #define INIT_PHYSMEM_REGIONS 4 /* Definition of memblock flags. */ -#define MEMBLOCK_HOTPLUG 0x1 /* hotpluggable region */ +enum { + MEMBLOCK_NONE = 0x0, /* No special request */ + MEMBLOCK_HOTPLUG = 0x1, /* hotpluggable region */ +}; struct memblock_region { phys_addr_t base; @@ -61,7 +64,7 @@ extern bool movable_node_enabled; phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align, phys_addr_t start, phys_addr_t end, - int nid); + int nid, ulong flags); phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, phys_addr_t size, phys_addr_t align); phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr); @@ -85,11 +88,13 @@ int memblock_remove_range(struct memblock_type *type, phys_addr_t base, phys_addr_t size); -void __next_mem_range(u64 *idx, int nid, struct memblock_type *type_a, +void __next_mem_range(u64 *idx, int nid, ulong flags, + struct memblock_type *type_a, struct memblock_type *type_b, phys_addr_t *out_start, phys_addr_t *out_end, int *out_nid); -void __next_mem_range_rev(u64 *idx, int nid, struct memblock_type *type_a, +void __next_mem_range_rev(u64 *idx, int nid, ulong flags, + struct memblock_type *type_a, struct memblock_type *type_b, phys_addr_t *out_start, phys_addr_t *out_end, int *out_nid); @@ -100,16 +105,17 @@ void __next_mem_range_rev(u64 *idx, int nid, struct memblock_type *type_a, * @type_a: ptr to memblock_type to iterate * @type_b: ptr to memblock_type which excludes from the iteration * @nid: node selector, %NUMA_NO_NODE for all nodes + * @flags: pick from blocks based on memory attributes * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL * @p_nid: ptr to int for nid of the range, can be %NULL */ -#define for_each_mem_range(i, type_a, type_b, nid, \ +#define for_each_mem_range(i, type_a, type_b, nid, flags, \ p_start, p_end, p_nid) \ - for (i = 0, __next_mem_range(&i, nid, type_a, type_b, \ + for (i = 0, __next_mem_range(&i, nid, flags, type_a, type_b, \ p_start, p_end, p_nid); \ i != (u64)ULLONG_MAX; \ - __next_mem_range(&i, nid, type_a, type_b, \ + __next_mem_range(&i, nid, flags, type_a, type_b, \ p_start, p_end, p_nid)) /** @@ -119,17 +125,18 @@ void __next_mem_range_rev(u64 *idx, int nid, struct memblock_type *type_a, * @type_a: ptr to memblock_type to iterate * @type_b: ptr to memblock_type which excludes from the iteration * @nid: node selector, %NUMA_NO_NODE for all nodes + * @flags: pick from blocks based on memory attributes * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL * @p_nid: ptr to int for nid of the range, can be %NULL */ -#define for_each_mem_range_rev(i, type_a, type_b, nid, \ +#define for_each_mem_range_rev(i, type_a, type_b, nid, flags, \ p_start, p_end, p_nid) \ for (i = (u64)ULLONG_MAX, \ - __next_mem_range_rev(&i, nid, type_a, type_b, \ + __next_mem_range_rev(&i, nid, flags, type_a, type_b,\ p_start, p_end, p_nid); \ i != (u64)ULLONG_MAX; \ - __next_mem_range_rev(&i, nid, type_a, type_b, \ + __next_mem_range_rev(&i, nid, flags, type_a, type_b, \ p_start, p_end, p_nid)) #ifdef CONFIG_MOVABLE_NODE @@ -181,13 +188,14 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL * @p_nid: ptr to int for nid of the range, can be %NULL + * @flags: pick from blocks based on memory attributes * * Walks over free (memory && !reserved) areas of memblock. Available as * soon as memblock is initialized. */ -#define for_each_free_mem_range(i, nid, p_start, p_end, p_nid) \ +#define for_each_free_mem_range(i, nid, flags, p_start, p_end, p_nid) \ for_each_mem_range(i, &memblock.memory, &memblock.reserved, \ - nid, p_start, p_end, p_nid) + nid, flags, p_start, p_end, p_nid) /** * for_each_free_mem_range_reverse - rev-iterate through free memblock areas @@ -196,13 +204,15 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL * @p_nid: ptr to int for nid of the range, can be %NULL + * @flags: pick from blocks based on memory attributes * * Walks over free (memory && !reserved) areas of memblock in reverse * order. Available as soon as memblock is initialized. */ -#define for_each_free_mem_range_reverse(i, nid, p_start, p_end, p_nid) \ +#define for_each_free_mem_range_reverse(i, nid, flags, p_start, p_end, \ + p_nid) \ for_each_mem_range_rev(i, &memblock.memory, &memblock.reserved, \ - nid, p_start, p_end, p_nid) + nid, flags, p_start, p_end, p_nid) static inline void memblock_set_region_flags(struct memblock_region *r, unsigned long flags) @@ -273,7 +283,8 @@ static inline bool memblock_bottom_up(void) { return false; } #define MEMBLOCK_ALLOC_ACCESSIBLE 0 phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align, - phys_addr_t start, phys_addr_t end); + phys_addr_t start, phys_addr_t end, + ulong flags); phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr); phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align, -- cgit v1.2.3 From a3f5bafcc04aaf62990e0cf3ced1cc6d8dc6fe95 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 24 Jun 2015 16:58:12 -0700 Subject: mm/memblock: allocate boot time data structures from mirrored memory Try to allocate all boot time kernel data structures from mirrored memory. If we run out of mirrored memory print warnings, but fall back to using non-mirrored memory to make sure that we still boot. By number of bytes, most of what we allocate at boot time is the page structures. 64 bytes per 4K page on x86_64 ... or about 1.5% of total system memory. For workloads where the bulk of memory is allocated to applications this may represent a useful improvement to system availability since 1.5% of total memory might be a third of the memory allocated to the kernel. Signed-off-by: Tony Luck Cc: Xishi Qiu Cc: Hanjun Guo Cc: Xiexiuqi Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Yinghai Lu Cc: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 7aeec0cb4c27..0215ffd63069 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -24,6 +24,7 @@ enum { MEMBLOCK_NONE = 0x0, /* No special request */ MEMBLOCK_HOTPLUG = 0x1, /* hotpluggable region */ + MEMBLOCK_MIRROR = 0x2, /* mirrored region */ }; struct memblock_region { @@ -78,6 +79,8 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size); void memblock_trim_memory(phys_addr_t align); int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size); int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size); +int memblock_mark_mirror(phys_addr_t base, phys_addr_t size); +ulong choose_memblock_flags(void); /* Low level functions */ int memblock_add_range(struct memblock_type *type, @@ -160,6 +163,11 @@ static inline bool movable_node_is_enabled(void) } #endif +static inline bool memblock_is_mirror(struct memblock_region *m) +{ + return m->flags & MEMBLOCK_MIRROR; +} + #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn, unsigned long *end_pfn); -- cgit v1.2.3 From b05b9f5f9dcf593a0e9327676b78e6c17b4218e8 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 24 Jun 2015 16:58:15 -0700 Subject: x86, mirror: x86 enabling - find mirrored memory ranges UEFI GetMemoryMap() uses a new attribute bit to mark mirrored memory address ranges. See UEFI 2.5 spec pages 157-158: http://www.uefi.org/sites/default/files/resources/UEFI%202_5.pdf On EFI enabled systems scan the memory map and tell memblock about any mirrored ranges. Signed-off-by: Tony Luck Cc: Xishi Qiu Cc: Hanjun Guo Cc: Xiexiuqi Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Yinghai Lu Cc: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/efi.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 2092965afca3..5f19efe4eb3f 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -96,6 +96,8 @@ typedef struct { #define EFI_MEMORY_WP ((u64)0x0000000000001000ULL) /* write-protect */ #define EFI_MEMORY_RP ((u64)0x0000000000002000ULL) /* read-protect */ #define EFI_MEMORY_XP ((u64)0x0000000000004000ULL) /* execute-protect */ +#define EFI_MEMORY_MORE_RELIABLE \ + ((u64)0x0000000000010000ULL) /* higher reliability */ #define EFI_MEMORY_RUNTIME ((u64)0x8000000000000000ULL) /* range requires runtime mapping */ #define EFI_MEMORY_DESCRIPTOR_VERSION 1 @@ -868,6 +870,7 @@ extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if pos extern void efi_late_init(void); extern void efi_free_boot_services(void); extern efi_status_t efi_query_variable_store(u32 attributes, unsigned long size); +extern void efi_find_mirror(void); #else static inline void efi_late_init(void) {} static inline void efi_free_boot_services(void) {} -- cgit v1.2.3 From d1dc6f1bcf1e998e7ce65fc120da371ab047a999 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Wed, 24 Jun 2015 16:58:18 -0700 Subject: frontswap: allow multiple backends Change frontswap single pointer to a singly linked list of frontswap implementations. Update Xen tmem implementation as register no longer returns anything. Frontswap only keeps track of a single implementation; any implementation that registers second (or later) will replace the previously registered implementation, and gets a pointer to the previous implementation that the new implementation is expected to pass all frontswap functions to if it can't handle the function itself. However that method doesn't really make much sense, as passing that work on to every implementation adds unnecessary work to implementations; instead, frontswap should simply keep a list of all registered implementations and try each implementation for any function. Most importantly, neither of the two currently existing frontswap implementations in the kernel actually do anything with any previous frontswap implementation that they replace when registering. This allows frontswap to successfully manage multiple implementations by keeping a list of them all. Signed-off-by: Dan Streetman Cc: Konrad Rzeszutek Wilk Cc: Boris Ostrovsky Cc: David Vrabel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/frontswap.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index 8293262401de..e65ef959546c 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -6,16 +6,16 @@ #include struct frontswap_ops { - void (*init)(unsigned); - int (*store)(unsigned, pgoff_t, struct page *); - int (*load)(unsigned, pgoff_t, struct page *); - void (*invalidate_page)(unsigned, pgoff_t); - void (*invalidate_area)(unsigned); + void (*init)(unsigned); /* this swap type was just swapon'ed */ + int (*store)(unsigned, pgoff_t, struct page *); /* store a page */ + int (*load)(unsigned, pgoff_t, struct page *); /* load a page */ + void (*invalidate_page)(unsigned, pgoff_t); /* page no longer needed */ + void (*invalidate_area)(unsigned); /* swap type just swapoff'ed */ + struct frontswap_ops *next; /* private pointer to next ops */ }; extern bool frontswap_enabled; -extern struct frontswap_ops * - frontswap_register_ops(struct frontswap_ops *ops); +extern void frontswap_register_ops(struct frontswap_ops *ops); extern void frontswap_shrink(unsigned long); extern unsigned long frontswap_curr_pages(void); extern void frontswap_writethrough(bool); -- cgit v1.2.3 From 8a8c35fadfaf55629a37ef1a8ead1b8fb32581d2 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 24 Jun 2015 16:58:51 -0700 Subject: mm: kmemleak_alloc_percpu() should follow the gfp from per_alloc() Beginning at commit d52d3997f843 ("ipv6: Create percpu rt6_info"), the following INFO splat is logged: =============================== [ INFO: suspicious RCU usage. ] 4.1.0-rc7-next-20150612 #1 Not tainted ------------------------------- kernel/sched/core.c:7318 Illegal context switch in RCU-bh read-side critical section! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 3 locks held by systemd/1: #0: (rtnl_mutex){+.+.+.}, at: [] rtnetlink_rcv+0x1f/0x40 #1: (rcu_read_lock_bh){......}, at: [] ipv6_add_addr+0x62/0x540 #2: (addrconf_hash_lock){+...+.}, at: [] ipv6_add_addr+0x184/0x540 stack backtrace: CPU: 0 PID: 1 Comm: systemd Not tainted 4.1.0-rc7-next-20150612 #1 Hardware name: TOSHIBA TECRA A50-A/TECRA A50-A, BIOS Version 4.20 04/17/2014 Call Trace: dump_stack+0x4c/0x6e lockdep_rcu_suspicious+0xe7/0x120 ___might_sleep+0x1d5/0x1f0 __might_sleep+0x4d/0x90 kmem_cache_alloc+0x47/0x250 create_object+0x39/0x2e0 kmemleak_alloc_percpu+0x61/0xe0 pcpu_alloc+0x370/0x630 Additional backtrace lines are truncated. In addition, the above splat is followed by several "BUG: sleeping function called from invalid context at mm/slub.c:1268" outputs. As suggested by Martin KaFai Lau, these are the clue to the fix. Routine kmemleak_alloc_percpu() always uses GFP_KERNEL for its allocations, whereas it should follow the gfp from its callers. Reviewed-by: Catalin Marinas Reviewed-by: Kamalesh Babulal Acked-by: Martin KaFai Lau Signed-off-by: Larry Finger Cc: Martin KaFai Lau Cc: Catalin Marinas Cc: Tejun Heo Cc: Christoph Lameter Cc: [3.18+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmemleak.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h index e705467ddb47..d0a1f99e24e3 100644 --- a/include/linux/kmemleak.h +++ b/include/linux/kmemleak.h @@ -28,7 +28,8 @@ extern void kmemleak_init(void) __ref; extern void kmemleak_alloc(const void *ptr, size_t size, int min_count, gfp_t gfp) __ref; -extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size) __ref; +extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size, + gfp_t gfp) __ref; extern void kmemleak_free(const void *ptr) __ref; extern void kmemleak_free_part(const void *ptr, size_t size) __ref; extern void kmemleak_free_percpu(const void __percpu *ptr) __ref; @@ -71,7 +72,8 @@ static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, gfp_t gfp) { } -static inline void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size) +static inline void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size, + gfp_t gfp) { } static inline void kmemleak_free(const void *ptr) -- cgit v1.2.3 From b94d5230d06eb930be82e67fb1a9a58271e78297 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 19 May 2015 22:54:31 -0400 Subject: libnvdimm, nfit: initial libnvdimm infrastructure and NFIT support A struct nvdimm_bus is the anchor device for registering nvdimm resources and interfaces, for example, a character control device, nvdimm devices, and I/O region devices. The ACPI NFIT (NVDIMM Firmware Interface Table) is one possible platform description for such non-volatile memory resources in a system. The nfit.ko driver attaches to the "ACPI0012" device that indicates the presence of the NFIT and parses the table to register a struct nvdimm_bus instance. Cc: Cc: Lv Zheng Cc: Robert Moore Cc: Rafael J. Wysocki Acked-by: Jeff Moyer Acked-by: Christoph Hellwig Acked-by: Rafael J. Wysocki Tested-by: Toshi Kani Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 include/linux/libnvdimm.h (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h new file mode 100644 index 000000000000..2b3c63950c91 --- /dev/null +++ b/include/linux/libnvdimm.h @@ -0,0 +1,34 @@ +/* + * libnvdimm - Non-volatile-memory Devices Subsystem + * + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef __LIBNVDIMM_H__ +#define __LIBNVDIMM_H__ +struct nvdimm; +struct nvdimm_bus_descriptor; +typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, unsigned int cmd, void *buf, + unsigned int buf_len); + +struct nvdimm_bus_descriptor { + unsigned long dsm_mask; + char *provider_name; + ndctl_fn ndctl; +}; + +struct device; +struct nvdimm_bus; +struct nvdimm_bus *nvdimm_bus_register(struct device *parent, + struct nvdimm_bus_descriptor *nfit_desc); +void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus); +#endif /* __LIBNVDIMM_H__ */ -- cgit v1.2.3 From 45def22c1fab85764646746ce38d45b2f3281fa5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 26 Apr 2015 19:26:48 -0400 Subject: libnvdimm: control character device and nvdimm_bus sysfs attributes The control device for a nvdimm_bus is registered as an "nd" class device. The expectation is that there will usually only be one "nd" bus registered under /sys/class/nd. However, we allow for the possibility of multiple buses and they will listed in discovery order as ndctl0...ndctlN. This character device hosts the ioctl for passing control messages. The initial command set has a 1:1 correlation with the commands listed in the by the "NFIT DSM Example" document [1], but this scheme is extensible to future command sets. Note, nd_ioctl() and the backing ->ndctl() implementation are defined in a subsequent patch. This is simply the initial registrations and sysfs attributes. [1]: http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf Cc: Neil Brown Cc: Greg KH Cc: Cc: Robert Moore Cc: Rafael J. Wysocki Acked-by: Christoph Hellwig Acked-by: Rafael J. Wysocki Tested-by: Toshi Kani Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 2b3c63950c91..d375cdc4abd5 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -14,6 +14,8 @@ */ #ifndef __LIBNVDIMM_H__ #define __LIBNVDIMM_H__ +extern struct attribute_group nvdimm_bus_attribute_group; + struct nvdimm; struct nvdimm_bus_descriptor; typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc, @@ -21,14 +23,16 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc, unsigned int buf_len); struct nvdimm_bus_descriptor { + const struct attribute_group **attr_groups; unsigned long dsm_mask; char *provider_name; ndctl_fn ndctl; }; struct device; -struct nvdimm_bus; struct nvdimm_bus *nvdimm_bus_register(struct device *parent, struct nvdimm_bus_descriptor *nfit_desc); void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus); +struct nvdimm_bus *to_nvdimm_bus(struct device *dev); +struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus); #endif /* __LIBNVDIMM_H__ */ -- cgit v1.2.3 From e6dfb2de47768efe8cc37c9a1863d2aff81440fb Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 25 Apr 2015 03:56:17 -0400 Subject: libnvdimm, nfit: dimm/memory-devices Enable nvdimm devices to be registered on a nvdimm_bus. The kernel assigned device id for nvdimm devicesis dynamic. If userspace needs a more static identifier it should consult a provider-specific attribute. In the case where NFIT is the provider, the 'nmemX/nfit/handle' or 'nmemX/nfit/serial' attributes may be used for this purpose. Cc: Neil Brown Cc: Cc: Greg KH Cc: Robert Moore Cc: Rafael J. Wysocki Acked-by: Christoph Hellwig Acked-by: Rafael J. Wysocki Tested-by: Toshi Kani Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index d375cdc4abd5..07787f0dd7de 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -14,6 +14,12 @@ */ #ifndef __LIBNVDIMM_H__ #define __LIBNVDIMM_H__ + +enum { + /* when a dimm supports both PMEM and BLK access a label is required */ + NDD_ALIASING = 1 << 0, +}; + extern struct attribute_group nvdimm_bus_attribute_group; struct nvdimm; @@ -34,5 +40,10 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent, struct nvdimm_bus_descriptor *nfit_desc); void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus); struct nvdimm_bus *to_nvdimm_bus(struct device *dev); +struct nvdimm *to_nvdimm(struct device *dev); struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus); +const char *nvdimm_name(struct nvdimm *nvdimm); +void *nvdimm_provider_data(struct nvdimm *nvdimm); +struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, + const struct attribute_group **groups, unsigned long flags); #endif /* __LIBNVDIMM_H__ */ -- cgit v1.2.3 From 62232e45f4a265abb43f0acf16e58f5d0b6e1ec9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 8 Jun 2015 14:27:06 -0400 Subject: libnvdimm: control (ioctl) messages for nvdimm_bus and nvdimm devices Most discovery/configuration of the nvdimm-subsystem is done via sysfs attributes. However, some nvdimm_bus instances, particularly the ACPI.NFIT bus, define a small set of messages that can be passed to the platform. For convenience we derive the initial libnvdimm-ioctl command formats directly from the NFIT DSM Interface Example formats. ND_CMD_SMART: media health and diagnostics ND_CMD_GET_CONFIG_SIZE: size of the label space ND_CMD_GET_CONFIG_DATA: read label space ND_CMD_SET_CONFIG_DATA: write label space ND_CMD_VENDOR: vendor-specific command passthrough ND_CMD_ARS_CAP: report address-range-scrubbing capabilities ND_CMD_ARS_START: initiate scrubbing ND_CMD_ARS_STATUS: report on scrubbing state ND_CMD_SMART_THRESHOLD: configure alarm thresholds for smart events If a platform later defines different commands than this set it is straightforward to extend support to those formats. Most of the commands target a specific dimm. However, the address-range-scrubbing commands target the bus. The 'commands' attribute in sysfs of an nvdimm_bus, or nvdimm, enumerate the supported commands for that object. Cc: Cc: Robert Moore Cc: Rafael J. Wysocki Reported-by: Nicholas Moulin Acked-by: Christoph Hellwig Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 07787f0dd7de..a39235819af3 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -14,13 +14,22 @@ */ #ifndef __LIBNVDIMM_H__ #define __LIBNVDIMM_H__ +#include +#include enum { /* when a dimm supports both PMEM and BLK access a label is required */ NDD_ALIASING = 1 << 0, + + /* need to set a limit somewhere, but yes, this is likely overkill */ + ND_IOCTL_MAX_BUFLEN = SZ_4M, + ND_CMD_MAX_ELEM = 4, + ND_CMD_MAX_ENVELOPE = 16, + ND_CMD_ARS_STATUS_MAX = SZ_4K, }; extern struct attribute_group nvdimm_bus_attribute_group; +extern struct attribute_group nvdimm_attribute_group; struct nvdimm; struct nvdimm_bus_descriptor; @@ -35,6 +44,14 @@ struct nvdimm_bus_descriptor { ndctl_fn ndctl; }; +struct nd_cmd_desc { + int in_num; + int out_num; + u32 in_sizes[ND_CMD_MAX_ELEM]; + int out_sizes[ND_CMD_MAX_ELEM]; +}; + +struct nvdimm_bus; struct device; struct nvdimm_bus *nvdimm_bus_register(struct device *parent, struct nvdimm_bus_descriptor *nfit_desc); @@ -45,5 +62,13 @@ struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus); const char *nvdimm_name(struct nvdimm *nvdimm); void *nvdimm_provider_data(struct nvdimm *nvdimm); struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, - const struct attribute_group **groups, unsigned long flags); + const struct attribute_group **groups, unsigned long flags, + unsigned long *dsm_mask); +const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd); +const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd); +u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd, + const struct nd_cmd_desc *desc, int idx, void *buf); +u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd, + const struct nd_cmd_desc *desc, int idx, const u32 *in_field, + const u32 *out_field); #endif /* __LIBNVDIMM_H__ */ -- cgit v1.2.3 From 4d88a97aa9e8cfa6460aab119c5da60ad2267423 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 31 May 2015 14:41:48 -0400 Subject: libnvdimm, nvdimm: dimm driver and base libnvdimm device-driver infrastructure * Implement the device-model infrastructure for loading modules and attaching drivers to nvdimm devices. This is a simple association of a nd-device-type number with a driver that has a bitmask of supported device types. To facilitate userspace bind/unbind operations 'modalias' and 'devtype', that also appear in the uevent, are added as generic sysfs attributes for all nvdimm devices. The reason for the device-type number is to support sub-types within a given parent devtype, be it a vendor-specific sub-type or otherwise. * The first consumer of this infrastructure is the driver for dimm devices. It simply uses control messages to retrieve and store the configuration-data image (label set) from each dimm. Note: nd_device_register() arranges for asynchronous registration of nvdimm bus devices by default. Cc: Greg KH Cc: Neil Brown Acked-by: Christoph Hellwig Tested-by: Toshi Kani Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 2 ++ include/linux/nd.h | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 include/linux/nd.h (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index a39235819af3..d3ebccf4ea8b 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -30,6 +30,7 @@ enum { extern struct attribute_group nvdimm_bus_attribute_group; extern struct attribute_group nvdimm_attribute_group; +extern struct attribute_group nd_device_attribute_group; struct nvdimm; struct nvdimm_bus_descriptor; @@ -71,4 +72,5 @@ u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd, u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd, const struct nd_cmd_desc *desc, int idx, const u32 *in_field, const u32 *out_field); +int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count); #endif /* __LIBNVDIMM_H__ */ diff --git a/include/linux/nd.h b/include/linux/nd.h new file mode 100644 index 000000000000..e074f67e53a3 --- /dev/null +++ b/include/linux/nd.h @@ -0,0 +1,39 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef __LINUX_ND_H__ +#define __LINUX_ND_H__ +#include +#include + +struct nd_device_driver { + struct device_driver drv; + unsigned long type; + int (*probe)(struct device *dev); + int (*remove)(struct device *dev); +}; + +static inline struct nd_device_driver *to_nd_device_driver( + struct device_driver *drv) +{ + return container_of(drv, struct nd_device_driver, drv); +} + +#define MODULE_ALIAS_ND_DEVICE(type) \ + MODULE_ALIAS("nd:t" __stringify(type) "*") +#define ND_DEVICE_MODALIAS_FMT "nd:t%d" + +int __must_check __nd_driver_register(struct nd_device_driver *nd_drv, + struct module *module, const char *mod_name); +#define nd_driver_register(driver) \ + __nd_driver_register(driver, THIS_MODULE, KBUILD_MODNAME) +#endif /* __LINUX_ND_H__ */ -- cgit v1.2.3 From 1f7df6f88b9245a7f2d0f8ecbc97dc88c8d0d8e1 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 9 Jun 2015 20:13:14 -0400 Subject: libnvdimm, nfit: regions (block-data-window, persistent memory, volatile memory) A "region" device represents the maximum capacity of a BLK range (mmio block-data-window(s)), or a PMEM range (DAX-capable persistent memory or volatile memory), without regard for aliasing. Aliasing, in the dimm-local address space (DPA), is resolved by metadata on a dimm to designate which exclusive interface will access the aliased DPA ranges. Support for the per-dimm metadata/label arrvies is in a subsequent patch. The name format of "region" devices is "regionN" where, like dimms, N is a global ida index assigned at discovery time. This id is not reliable across reboots nor in the presence of hotplug. Look to attributes of the region or static id-data of the sub-namespace to generate a persistent name. However, if the platform configuration does not change it is reasonable to expect the same region id to be assigned at the next boot. "region"s have 2 generic attributes "size", and "mapping"s where: - size: the BLK accessible capacity or the span of the system physical address range in the case of PMEM. - mappingN: a tuple describing a dimm's contribution to the region's capacity in the format (,,). For a PMEM-region there will be at least one mapping per dimm in the interleave set. For a BLK-region there is only "mapping0" listing the starting DPA of the BLK-region and the available DPA capacity of that space (matches "size" above). The max number of mappings per "region" is hard coded per the constraints of sysfs attribute groups. That said the number of mappings per region should never exceed the maximum number of possible dimms in the system. If the current number turns out to not be enough then the "mappings" attribute clarifies how many there are supposed to be. "32 should be enough for anybody...". Cc: Neil Brown Cc: Cc: Greg KH Cc: Robert Moore Cc: Rafael J. Wysocki Acked-by: Christoph Hellwig Acked-by: Rafael J. Wysocki Tested-by: Toshi Kani Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index d3ebccf4ea8b..39e7e606092a 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -26,11 +26,14 @@ enum { ND_CMD_MAX_ELEM = 4, ND_CMD_MAX_ENVELOPE = 16, ND_CMD_ARS_STATUS_MAX = SZ_4K, + ND_MAX_MAPPINGS = 32, }; extern struct attribute_group nvdimm_bus_attribute_group; extern struct attribute_group nvdimm_attribute_group; extern struct attribute_group nd_device_attribute_group; +extern struct attribute_group nd_region_attribute_group; +extern struct attribute_group nd_mapping_attribute_group; struct nvdimm; struct nvdimm_bus_descriptor; @@ -38,6 +41,12 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len); +struct nd_mapping { + struct nvdimm *nvdimm; + u64 start; + u64 size; +}; + struct nvdimm_bus_descriptor { const struct attribute_group **attr_groups; unsigned long dsm_mask; @@ -52,6 +61,14 @@ struct nd_cmd_desc { int out_sizes[ND_CMD_MAX_ELEM]; }; +struct nd_region_desc { + struct resource *res; + struct nd_mapping *nd_mapping; + u16 num_mappings; + const struct attribute_group **attr_groups; + void *provider_data; +}; + struct nvdimm_bus; struct device; struct nvdimm_bus *nvdimm_bus_register(struct device *parent, @@ -59,9 +76,11 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent, void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus); struct nvdimm_bus *to_nvdimm_bus(struct device *dev); struct nvdimm *to_nvdimm(struct device *dev); +struct nd_region *to_nd_region(struct device *dev); struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus); const char *nvdimm_name(struct nvdimm *nvdimm); void *nvdimm_provider_data(struct nvdimm *nvdimm); +void *nd_region_provider_data(struct nd_region *nd_region); struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, const struct attribute_group **groups, unsigned long flags, unsigned long *dsm_mask); @@ -73,4 +92,10 @@ u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd, const struct nd_cmd_desc *desc, int idx, const u32 *in_field, const u32 *out_field); int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count); +struct nd_region *nvdimm_pmem_region_create(struct nvdimm_bus *nvdimm_bus, + struct nd_region_desc *ndr_desc); +struct nd_region *nvdimm_blk_region_create(struct nvdimm_bus *nvdimm_bus, + struct nd_region_desc *ndr_desc); +struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus, + struct nd_region_desc *ndr_desc); #endif /* __LIBNVDIMM_H__ */ -- cgit v1.2.3 From 3d88002e4a7bd40f355550284c6cd140e6fe29dc Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 31 May 2015 15:02:11 -0400 Subject: libnvdimm: support for legacy (non-aliasing) nvdimms The libnvdimm region driver is an intermediary driver that translates non-volatile "region"s into "namespace" sub-devices that are surfaced by persistent memory block-device drivers (PMEM and BLK). ACPI 6 introduces the concept that a given nvdimm may simultaneously offer multiple access modes to its media through direct PMEM load/store access, or windowed BLK mode. Existing nvdimms mostly implement a PMEM interface, some offer a BLK-like mode, but never both as ACPI 6 defines. If an nvdimm is single interfaced, then there is no need for dimm metadata labels. For these devices we can take the region boundaries directly to create a child namespace device (nd_namespace_io). Acked-by: Christoph Hellwig Tested-by: Toshi Kani Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 7 +++++-- include/linux/nd.h | 10 ++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 39e7e606092a..37f966aff386 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -71,8 +71,11 @@ struct nd_region_desc { struct nvdimm_bus; struct device; -struct nvdimm_bus *nvdimm_bus_register(struct device *parent, - struct nvdimm_bus_descriptor *nfit_desc); +struct module; +struct nvdimm_bus *__nvdimm_bus_register(struct device *parent, + struct nvdimm_bus_descriptor *nfit_desc, struct module *module); +#define nvdimm_bus_register(parent, desc) \ + __nvdimm_bus_register(parent, desc, THIS_MODULE) void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus); struct nvdimm_bus *to_nvdimm_bus(struct device *dev); struct nvdimm *to_nvdimm(struct device *dev); diff --git a/include/linux/nd.h b/include/linux/nd.h index e074f67e53a3..da70e9962197 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -26,6 +26,16 @@ static inline struct nd_device_driver *to_nd_device_driver( struct device_driver *drv) { return container_of(drv, struct nd_device_driver, drv); +}; + +struct nd_namespace_io { + struct device dev; + struct resource res; +}; + +static inline struct nd_namespace_io *to_nd_namespace_io(struct device *dev) +{ + return container_of(dev, struct nd_namespace_io, dev); } #define MODULE_ALIAS_ND_DEVICE(type) \ -- cgit v1.2.3 From eaf961536e1622ad21247ac8d44acd48ba65566e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 1 May 2015 13:11:27 -0400 Subject: libnvdimm, nfit: add interleave-set state-tracking infrastructure On platforms that have firmware support for reading/writing per-dimm label space, a portion of the dimm may be accessible via an interleave set PMEM mapping in addition to the dimm's BLK (block-data-window aperture(s)) interface. A label, stored in a "configuration data region" on the dimm, disambiguates which dimm addresses are accessed through which exclusive interface. Add infrastructure that allows the kernel to block modifications to a label in the set while any member dimm is active. Note that this is meant only for enforcing "no modifications of active labels" via the coarse ioctl command. Adding/deleting namespaces from an active interleave set is always possible via sysfs. Another aspect of tracking interleave sets is tracking their integrity when DIMMs in a set are physically re-ordered. For this purpose we generate an "interleave-set cookie" that can be recorded in a label and validated against the current configuration. It is the bus provider implementation's responsibility to calculate the interleave set cookie and attach it to a given region. Cc: Neil Brown Cc: Cc: Greg KH Cc: Robert Moore Cc: Rafael J. Wysocki Acked-by: Christoph Hellwig Acked-by: Rafael J. Wysocki Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 37f966aff386..1b627b109360 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -61,11 +61,16 @@ struct nd_cmd_desc { int out_sizes[ND_CMD_MAX_ELEM]; }; +struct nd_interleave_set { + u64 cookie; +}; + struct nd_region_desc { struct resource *res; struct nd_mapping *nd_mapping; u16 num_mappings; const struct attribute_group **attr_groups; + struct nd_interleave_set *nd_set; void *provider_data; }; @@ -101,4 +106,5 @@ struct nd_region *nvdimm_blk_region_create(struct nvdimm_bus *nvdimm_bus, struct nd_region_desc *ndr_desc); struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus, struct nd_region_desc *ndr_desc); +u64 nd_fletcher64(void *addr, size_t len, bool le); #endif /* __LIBNVDIMM_H__ */ -- cgit v1.2.3 From bf9bccc14c05dae8caba29df6187c731710f5380 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 17 Jun 2015 17:14:46 -0400 Subject: libnvdimm: pmem label sets and namespace instantiation. A complete label set is a PMEM-label per-dimm per-interleave-set where all the UUIDs match and the interleave set cookie matches the hosting interleave set. Present sysfs attributes for manipulation of a PMEM-namespace's 'alt_name', 'uuid', and 'size' attributes. A later patch will make these settings persistent by writing back the label. Note that PMEM allocations grow forwards from the start of an interleave set (lowest dimm-physical-address (DPA)). BLK-namespaces that alias with a PMEM interleave set will grow allocations backward from the highest DPA. Cc: Greg KH Cc: Neil Brown Acked-by: Christoph Hellwig Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 10 ++++++++++ include/linux/nd.h | 24 ++++++++++++++++++++++++ 2 files changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 1b627b109360..c130972e08c4 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -41,10 +41,20 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len); +struct nd_namespace_label; +struct nvdimm_drvdata; struct nd_mapping { struct nvdimm *nvdimm; + struct nd_namespace_label **labels; u64 start; u64 size; + /* + * @ndd is for private use at region enable / disable time for + * get_ndd() + put_ndd(), all other nd_mapping to ndd + * conversions use to_ndd() which respects enabled state of the + * nvdimm. + */ + struct nvdimm_drvdata *ndd; }; struct nvdimm_bus_descriptor { diff --git a/include/linux/nd.h b/include/linux/nd.h index da70e9962197..255c38a83083 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -28,16 +28,40 @@ static inline struct nd_device_driver *to_nd_device_driver( return container_of(drv, struct nd_device_driver, drv); }; +/** + * struct nd_namespace_io - infrastructure for loading an nd_pmem instance + * @dev: namespace device created by the nd region driver + * @res: struct resource conversion of a NFIT SPA table + */ struct nd_namespace_io { struct device dev; struct resource res; }; +/** + * struct nd_namespace_pmem - namespace device for dimm-backed interleaved memory + * @nsio: device and system physical address range to drive + * @alt_name: namespace name supplied in the dimm label + * @uuid: namespace name supplied in the dimm label + */ +struct nd_namespace_pmem { + struct nd_namespace_io nsio; + char *alt_name; + u8 *uuid; +}; + static inline struct nd_namespace_io *to_nd_namespace_io(struct device *dev) { return container_of(dev, struct nd_namespace_io, dev); } +static inline struct nd_namespace_pmem *to_nd_namespace_pmem(struct device *dev) +{ + struct nd_namespace_io *nsio = to_nd_namespace_io(dev); + + return container_of(nsio, struct nd_namespace_pmem, nsio); +} + #define MODULE_ALIAS_ND_DEVICE(type) \ MODULE_ALIAS("nd:t" __stringify(type) "*") #define ND_DEVICE_MODALIAS_FMT "nd:t%d" -- cgit v1.2.3 From 1b40e09a1232de537b193fa1b6b3ef16d3a1e397 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 1 May 2015 13:34:01 -0400 Subject: libnvdimm: blk labels and namespace instantiation A blk label set describes a namespace comprised of one or more discontiguous dpa ranges on a single dimm. They may alias with one or more pmem interleave sets that include the given dimm. This is the runtime/volatile configuration infrastructure for sysfs manipulation of 'alt_name', 'uuid', 'size', and 'sector_size'. A later patch will make these settings persistent by writing back the label(s). Unlike pmem namespaces, multiple blk namespaces can be created per region. Once a blk namespace has been created a new seed device (unconfigured child of a parent blk region) is instantiated. As long as a region has 'available_size' != 0 new child namespaces may be created. Cc: Greg KH Cc: Neil Brown Acked-by: Christoph Hellwig Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 3 +++ include/linux/nd.h | 25 +++++++++++++++++++++++++ 2 files changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index c130972e08c4..a59dca17b3aa 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -27,6 +27,9 @@ enum { ND_CMD_MAX_ENVELOPE = 16, ND_CMD_ARS_STATUS_MAX = SZ_4K, ND_MAX_MAPPINGS = 32, + + /* mark newly adjusted resources as requiring a label update */ + DPA_RESOURCE_ADJUSTED = 1 << 0, }; extern struct attribute_group nvdimm_bus_attribute_group; diff --git a/include/linux/nd.h b/include/linux/nd.h index 255c38a83083..23276ea91690 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -50,6 +50,26 @@ struct nd_namespace_pmem { u8 *uuid; }; +/** + * struct nd_namespace_blk - namespace for dimm-bounded persistent memory + * @dev: namespace device creation by the nd region driver + * @alt_name: namespace name supplied in the dimm label + * @uuid: namespace name supplied in the dimm label + * @id: ida allocated id + * @lbasize: blk namespaces have a native sector size when btt not present + * @num_resources: number of dpa extents to claim + * @res: discontiguous dpa extents for given dimm + */ +struct nd_namespace_blk { + struct device dev; + char *alt_name; + u8 *uuid; + int id; + unsigned long lbasize; + int num_resources; + struct resource **res; +}; + static inline struct nd_namespace_io *to_nd_namespace_io(struct device *dev) { return container_of(dev, struct nd_namespace_io, dev); @@ -62,6 +82,11 @@ static inline struct nd_namespace_pmem *to_nd_namespace_pmem(struct device *dev) return container_of(nsio, struct nd_namespace_pmem, nsio); } +static inline struct nd_namespace_blk *to_nd_namespace_blk(struct device *dev) +{ + return container_of(dev, struct nd_namespace_blk, dev); +} + #define MODULE_ALIAS_ND_DEVICE(type) \ MODULE_ALIAS("nd:t" __stringify(type) "*") #define ND_DEVICE_MODALIAS_FMT "nd:t%d" -- cgit v1.2.3 From d7f96f97c4031fa4ffdb7801f9aae23e96170a6f Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Thu, 25 Jun 2015 09:06:56 +0200 Subject: firmware: dmi_scan: add SBMIOS entry and DMI tables Some utils, like dmidecode and smbios, need to access SMBIOS entry table area in order to get information like SMBIOS version, size, etc. Currently it's done via /dev/mem. But for situation when /dev/mem usage is disabled, the utils have to use dmi sysfs instead, which doesn't represent SMBIOS entry and adds code/delay redundancy when direct access for table is needed. So this patch creates dmi/tables and adds SMBIOS entry point to allow utils in question to work correctly without /dev/mem. Also patch adds raw dmi table to simplify dmi table processing in user space, as proposed by Jean Delvare. Tested-by: Roy Franz Signed-off-by: Ivan Khoronzhuk Signed-off-by: Jean Delvare --- include/linux/dmi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmi.h b/include/linux/dmi.h index f820f0a336c9..2f9f98827c0a 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -2,6 +2,7 @@ #define __DMI_H__ #include +#include #include /* enum dmi_field is in mod_devicetable.h */ @@ -93,6 +94,7 @@ struct dmi_dev_onboard { int devfn; }; +extern struct kobject *dmi_kobj; extern int dmi_check_system(const struct dmi_system_id *list); const struct dmi_system_id *dmi_first_match(const struct dmi_system_id *list); extern const char * dmi_get_system_info(int field); -- cgit v1.2.3 From 9ea650c804404e55dbf17c928203141282e759ab Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 25 Jun 2015 09:06:57 +0200 Subject: firmware: dmi: struct dmi_header should be packed Apparently the compiler does fine without it, but it feels safer and clearer to add the missing attribute. Signed-off-by: Jean Delvare --- include/linux/dmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dmi.h b/include/linux/dmi.h index 2f9f98827c0a..5055ac34142d 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -75,7 +75,7 @@ struct dmi_header { u8 type; u8 length; u16 handle; -}; +} __packed; struct dmi_device { struct list_head list; -- cgit v1.2.3 From 8c2f7e8658df1d3b7cbfa62706941d14c715823a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 25 Jun 2015 04:20:04 -0400 Subject: libnvdimm: infrastructure for btt devices NVDIMM namespaces, in addition to accepting "struct bio" based requests, also have the capability to perform byte-aligned accesses. By default only the bio/block interface is used. However, if another driver can make effective use of the byte-aligned capability it can claim namespace interface and use the byte-aligned ->rw_bytes() interface. The BTT driver is the initial first consumer of this mechanism to allow adding atomic sector update semantics to a pmem or blk namespace. This patch is the sysfs infrastructure to allow configuring a BTT instance for a namespace. Enabling that BTT and performing i/o is in a subsequent patch. Cc: Greg KH Cc: Neil Brown Signed-off-by: Dan Williams --- include/linux/nd.h | 63 +++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 58 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nd.h b/include/linux/nd.h index 23276ea91690..507e47c86737 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -12,6 +12,7 @@ */ #ifndef __LINUX_ND_H__ #define __LINUX_ND_H__ +#include #include #include @@ -28,13 +29,33 @@ static inline struct nd_device_driver *to_nd_device_driver( return container_of(drv, struct nd_device_driver, drv); }; +/** + * struct nd_namespace_common - core infrastructure of a namespace + * @force_raw: ignore other personalities for the namespace (e.g. btt) + * @dev: device model node + * @claim: when set a another personality has taken ownership of the namespace + * @rw_bytes: access the raw namespace capacity with byte-aligned transfers + */ +struct nd_namespace_common { + int force_raw; + struct device dev; + struct device *claim; + int (*rw_bytes)(struct nd_namespace_common *, resource_size_t offset, + void *buf, size_t size, int rw); +}; + +static inline struct nd_namespace_common *to_ndns(struct device *dev) +{ + return container_of(dev, struct nd_namespace_common, dev); +} + /** * struct nd_namespace_io - infrastructure for loading an nd_pmem instance * @dev: namespace device created by the nd region driver * @res: struct resource conversion of a NFIT SPA table */ struct nd_namespace_io { - struct device dev; + struct nd_namespace_common common; struct resource res; }; @@ -52,7 +73,6 @@ struct nd_namespace_pmem { /** * struct nd_namespace_blk - namespace for dimm-bounded persistent memory - * @dev: namespace device creation by the nd region driver * @alt_name: namespace name supplied in the dimm label * @uuid: namespace name supplied in the dimm label * @id: ida allocated id @@ -61,7 +81,7 @@ struct nd_namespace_pmem { * @res: discontiguous dpa extents for given dimm */ struct nd_namespace_blk { - struct device dev; + struct nd_namespace_common common; char *alt_name; u8 *uuid; int id; @@ -72,7 +92,7 @@ struct nd_namespace_blk { static inline struct nd_namespace_io *to_nd_namespace_io(struct device *dev) { - return container_of(dev, struct nd_namespace_io, dev); + return container_of(dev, struct nd_namespace_io, common.dev); } static inline struct nd_namespace_pmem *to_nd_namespace_pmem(struct device *dev) @@ -84,7 +104,40 @@ static inline struct nd_namespace_pmem *to_nd_namespace_pmem(struct device *dev) static inline struct nd_namespace_blk *to_nd_namespace_blk(struct device *dev) { - return container_of(dev, struct nd_namespace_blk, dev); + return container_of(dev, struct nd_namespace_blk, common.dev); +} + +/** + * nvdimm_read_bytes() - synchronously read bytes from an nvdimm namespace + * @ndns: device to read + * @offset: namespace-relative starting offset + * @buf: buffer to fill + * @size: transfer length + * + * @buf is up-to-date upon return from this routine. + */ +static inline int nvdimm_read_bytes(struct nd_namespace_common *ndns, + resource_size_t offset, void *buf, size_t size) +{ + return ndns->rw_bytes(ndns, offset, buf, size, READ); +} + +/** + * nvdimm_write_bytes() - synchronously write bytes to an nvdimm namespace + * @ndns: device to read + * @offset: namespace-relative starting offset + * @buf: buffer to drain + * @size: transfer length + * + * NVDIMM Namepaces disks do not implement sectors internally. Depending on + * the @ndns, the contents of @buf may be in cpu cache, platform buffers, + * or on backing memory media upon return from this routine. Flushing + * to media is handled internal to the @ndns driver, if at all. + */ +static inline int nvdimm_write_bytes(struct nd_namespace_common *ndns, + resource_size_t offset, void *buf, size_t size) +{ + return ndns->rw_bytes(ndns, offset, buf, size, WRITE); } #define MODULE_ALIAS_ND_DEVICE(type) \ -- cgit v1.2.3 From 144cba1493fdd6e3e1980e439a31df877831ebcd Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 27 Apr 2015 11:09:54 +0800 Subject: libceph: allow setting osd_req_op's flags Signed-off-by: Yan, Zheng Reviewed-by: Alex Elder --- include/linux/ceph/osd_client.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 61b19c46bdb3..7506b485bb6d 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -249,7 +249,7 @@ extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg); extern void osd_req_op_init(struct ceph_osd_request *osd_req, - unsigned int which, u16 opcode); + unsigned int which, u16 opcode, u32 flags); extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *, unsigned int which, -- cgit v1.2.3 From d50c97b566c5bbf990eff472e9feaa58fdebdd33 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 15 May 2015 11:52:20 +0300 Subject: libceph: nuke time_sub() Unused since ceph got merged into mainline I guess. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/libceph.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 30f92cefaa72..85ae9a889a3f 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -93,15 +93,6 @@ enum { CEPH_MOUNT_SHUTDOWN, }; -/* - * subtract jiffies - */ -static inline unsigned long time_sub(unsigned long a, unsigned long b) -{ - BUG_ON(time_after(b, a)); - return (long)a - (long)b; -} - struct ceph_mds_client; /* -- cgit v1.2.3 From a319bf56a617354e62cf5f774d2ca4e1a8a3bff3 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 15 May 2015 12:02:17 +0300 Subject: libceph: store timeouts in jiffies, verify user input There are currently three libceph-level timeouts that the user can specify on mount: mount_timeout, osd_idle_ttl and osdkeepalive. All of these are in seconds and no checking is done on user input: negative values are accepted, we multiply them all by HZ which may or may not overflow, arbitrarily large jiffies then get added together, etc. There is also a bug in the way mount_timeout=0 is handled. It's supposed to mean "infinite timeout", but that's not how wait.h APIs treat it and so __ceph_open_session() for example will busy loop without much chance of being interrupted if none of ceph-mons are there. Fix all this by verifying user input, storing timeouts capped by msecs_to_jiffies() in jiffies and using the new ceph_timeout_jiffies() helper for all user-specified waits to handle infinite timeouts correctly. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/libceph.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 85ae9a889a3f..d73a569f9bf5 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -43,9 +43,9 @@ struct ceph_options { int flags; struct ceph_fsid fsid; struct ceph_entity_addr my_addr; - int mount_timeout; - int osd_idle_ttl; - int osd_keepalive_timeout; + unsigned long mount_timeout; /* jiffies */ + unsigned long osd_idle_ttl; /* jiffies */ + unsigned long osd_keepalive_timeout; /* jiffies */ /* * any type that can't be simply compared or doesn't need need @@ -63,9 +63,9 @@ struct ceph_options { /* * defaults */ -#define CEPH_MOUNT_TIMEOUT_DEFAULT 60 -#define CEPH_OSD_KEEPALIVE_DEFAULT 5 -#define CEPH_OSD_IDLE_TTL_DEFAULT 60 +#define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000) +#define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) +#define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) @@ -93,6 +93,11 @@ enum { CEPH_MOUNT_SHUTDOWN, }; +static inline unsigned long ceph_timeout_jiffies(unsigned long timeout) +{ + return timeout ?: MAX_SCHEDULE_TIMEOUT; +} + struct ceph_mds_client; /* -- cgit v1.2.3 From f66fd9f0952187d274c13c136b74548f792c1925 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 10 Jun 2015 17:26:13 +0800 Subject: ceph: pre-allocate data structure that tracks caps flushing Signed-off-by: Yan, Zheng --- include/linux/ceph/libceph.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index d73a569f9bf5..9ebee53d3bf5 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -174,6 +174,7 @@ static inline int calc_pages_for(u64 off, u64 len) extern struct kmem_cache *ceph_inode_cachep; extern struct kmem_cache *ceph_cap_cachep; +extern struct kmem_cache *ceph_cap_flush_cachep; extern struct kmem_cache *ceph_dentry_cachep; extern struct kmem_cache *ceph_file_cachep; -- cgit v1.2.3 From b459be739f97e2062b2ba77cfe8ea198dbd58904 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 12 Jun 2015 13:21:07 +0300 Subject: crush: sync up with userspace .. up to ceph.git commit 1db1abc8328d ("crush: eliminate ad hoc diff between kernel and userspace"). This fixes a bunch of recently pulled coding style issues and makes includes a bit cleaner. A patch "crush:Make the function crush_ln static" from Nicholas Krause is folded in as crush_ln() has been made static in userspace as well. Signed-off-by: Ilya Dryomov --- include/linux/crush/crush.h | 40 ++++++++++++++++++++++++++++++++++++++-- include/linux/crush/hash.h | 6 ++++++ include/linux/crush/mapper.h | 2 +- 3 files changed, 45 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 48a1a7d100f1..48b49305716b 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -1,7 +1,11 @@ #ifndef CEPH_CRUSH_CRUSH_H #define CEPH_CRUSH_CRUSH_H -#include +#ifdef __KERNEL__ +# include +#else +# include "crush_compat.h" +#endif /* * CRUSH is a pseudo-random data distribution algorithm that @@ -20,7 +24,11 @@ #define CRUSH_MAGIC 0x00010000ul /* for detecting algorithm revisions */ #define CRUSH_MAX_DEPTH 10 /* max crush hierarchy depth */ +#define CRUSH_MAX_RULESET (1<<8) /* max crush ruleset number */ +#define CRUSH_MAX_RULES CRUSH_MAX_RULESET /* should be the same as max rulesets */ +#define CRUSH_MAX_DEVICE_WEIGHT (100u * 0x10000u) +#define CRUSH_MAX_BUCKET_WEIGHT (65535u * 0x10000u) #define CRUSH_ITEM_UNDEF 0x7ffffffe /* undefined result (internal use only) */ #define CRUSH_ITEM_NONE 0x7fffffff /* no result */ @@ -108,6 +116,15 @@ enum { }; extern const char *crush_bucket_alg_name(int alg); +/* + * although tree was a legacy algorithm, it has been buggy, so + * exclude it. + */ +#define CRUSH_LEGACY_ALLOWED_BUCKET_ALGS ( \ + (1 << CRUSH_BUCKET_UNIFORM) | \ + (1 << CRUSH_BUCKET_LIST) | \ + (1 << CRUSH_BUCKET_STRAW)) + struct crush_bucket { __s32 id; /* this'll be negative */ __u16 type; /* non-zero; type=0 is reserved for devices */ @@ -174,7 +191,7 @@ struct crush_map { /* choose local attempts using a fallback permutation before * re-descent */ __u32 choose_local_fallback_tries; - /* choose attempts before giving up */ + /* choose attempts before giving up */ __u32 choose_total_tries; /* attempt chooseleaf inner descent once for firstn mode; on * reject retry outer descent. Note that this does *not* @@ -187,6 +204,25 @@ struct crush_map { * that want to limit reshuffling, a value of 3 or 4 will make the * mappings line up a bit better with previous mappings. */ __u8 chooseleaf_vary_r; + +#ifndef __KERNEL__ + /* + * version 0 (original) of straw_calc has various flaws. version 1 + * fixes a few of them. + */ + __u8 straw_calc_version; + + /* + * allowed bucket algs is a bitmask, here the bit positions + * are CRUSH_BUCKET_*. note that these are *bits* and + * CRUSH_BUCKET_* values are not, so we need to or together (1 + * << CRUSH_BUCKET_WHATEVER). The 0th bit is not used to + * minimize confusion (bucket type values start at 1). + */ + __u32 allowed_bucket_algs; + + __u32 *choose_tries; +#endif }; diff --git a/include/linux/crush/hash.h b/include/linux/crush/hash.h index 91e884230d5d..d1d90258242e 100644 --- a/include/linux/crush/hash.h +++ b/include/linux/crush/hash.h @@ -1,6 +1,12 @@ #ifndef CEPH_CRUSH_HASH_H #define CEPH_CRUSH_HASH_H +#ifdef __KERNEL__ +# include +#else +# include "crush_compat.h" +#endif + #define CRUSH_HASH_RJENKINS1 0 #define CRUSH_HASH_DEFAULT CRUSH_HASH_RJENKINS1 diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h index eab367446eea..5dfd5b1125d2 100644 --- a/include/linux/crush/mapper.h +++ b/include/linux/crush/mapper.h @@ -8,7 +8,7 @@ * LGPL2 */ -#include +#include "crush.h" extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size); extern int crush_do_rule(const struct crush_map *map, -- cgit v1.2.3 From daf7ab7c58ac5f6304b63cca47a06cdc213361d7 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 4 Jun 2015 12:13:11 +0800 Subject: genirq: Clean up outdated comments related to include/linux/irqdesc.h Seems we have little chance to move irqdesc.h from include/linux/ into kernel/irq/, so remove the outdated comments. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Jason Cooper Cc: Kevin Cernekee Cc: Arnd Bergmann Cc: Marc Zyngier Link: http://lkml.kernel.org/r/1433391238-19471-2-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 1 - include/linux/irqdesc.h | 3 --- 2 files changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 812149160d3b..92188b0225bb 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -407,7 +407,6 @@ enum { IRQCHIP_EOI_THREADED = (1 << 6), }; -/* This include will go away once we isolated irq_desc usage to core code */ #include /* diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index c52d1480f272..66c41b40ca50 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -3,9 +3,6 @@ /* * Core internal functions to deal with irq descriptors - * - * This include will move to kernel/irq once we cleaned up the tree. - * For now it's included from */ struct irq_affinity_notify; -- cgit v1.2.3 From 08dad486d98e55d49b760cbaa20b71f35566d4e6 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 4 Jun 2015 12:13:12 +0800 Subject: genirq: Remove irq_node() Macro irq_node() has no user. Remove it. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Link: http://lkml.kernel.org/r/1433391238-19471-3-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- include/linux/irqnr.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index fdd5cc16c9c4..9669bf9d4f48 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -23,12 +23,6 @@ unsigned int irq_get_next_irq(unsigned int offset); ; \ else -#ifdef CONFIG_SMP -#define irq_node(irq) (irq_get_irq_data(irq)->node) -#else -#define irq_node(irq) 0 -#endif - # define for_each_active_irq(irq) \ for (irq = irq_get_next_irq(0); irq < nr_irqs; \ irq = irq_get_next_irq(irq + 1)) -- cgit v1.2.3 From 479305fd7172503772575997eb6f1b0a2bb4a107 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Thu, 25 Jun 2015 15:00:40 -0700 Subject: zpool: remove zpool_evict() Remove zpool_evict() helper function. As zbud is currently the only zpool implementation that supports eviction, add zpool and zpool_ops references to struct zbud_pool and directly call zpool_ops->evict(zpool, handle) on eviction. Currently zpool provides the zpool_evict helper which locks the zpool list lock and searches through all pools to find the specific one matching the caller, and call the corresponding zpool_ops->evict function. However, this is unnecessary, as the zbud pool can simply keep a reference to the zpool that created it, as well as the zpool_ops, and directly call the zpool_ops->evict function, when it needs to evict a page. This avoids a spinlock and list search in zpool for each eviction. Signed-off-by: Dan Streetman Cc: Seth Jennings Cc: Minchan Kim Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/zpool.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/zpool.h b/include/linux/zpool.h index 56529b34dc63..d30eff3d84d5 100644 --- a/include/linux/zpool.h +++ b/include/linux/zpool.h @@ -81,7 +81,8 @@ struct zpool_driver { atomic_t refcount; struct list_head list; - void *(*create)(char *name, gfp_t gfp, struct zpool_ops *ops); + void *(*create)(char *name, gfp_t gfp, struct zpool_ops *ops, + struct zpool *zpool); void (*destroy)(void *pool); int (*malloc)(void *pool, size_t size, gfp_t gfp, @@ -102,6 +103,4 @@ void zpool_register_driver(struct zpool_driver *driver); int zpool_unregister_driver(struct zpool_driver *driver); -int zpool_evict(void *pool, unsigned long handle); - #endif -- cgit v1.2.3 From f6d133f877c8bb0a0934dc8c521c758ee771e901 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 25 Jun 2015 15:01:00 -0700 Subject: compiler-gcc.h: neatening - Move the inline and noinline blocks together - Comment neatening - Alignment of __attribute__ uses - Consistent naming of __must_be_array macro argument - Multiline macro neatening Signed-off-by: Joe Perches Cc: Andi Kleen Cc: Michal Marek Cc: Segher Boessenkool Cc: Sasha Levin Cc: Anton Blanchard Cc: Alan Modra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 85 +++++++++++++++++++++++--------------------- 1 file changed, 45 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 371e560d13cf..5c2c14e3c647 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -5,9 +5,9 @@ /* * Common definitions for all gcc versions go here. */ -#define GCC_VERSION (__GNUC__ * 10000 \ - + __GNUC_MINOR__ * 100 \ - + __GNUC_PATCHLEVEL__) +#define GCC_VERSION (__GNUC__ * 10000 \ + + __GNUC_MINOR__ * 100 \ + + __GNUC_PATCHLEVEL__) /* Optimization barrier */ @@ -46,55 +46,63 @@ * the inline assembly constraint from =g to =r, in this particular * case either is valid. */ -#define RELOC_HIDE(ptr, off) \ - ({ unsigned long __ptr; \ - __asm__ ("" : "=r"(__ptr) : "0"(ptr)); \ - (typeof(ptr)) (__ptr + (off)); }) +#define RELOC_HIDE(ptr, off) \ +({ \ + unsigned long __ptr; \ + __asm__ ("" : "=r"(__ptr) : "0"(ptr)); \ + (typeof(ptr)) (__ptr + (off)); \ +}) /* Make the optimizer believe the variable can be manipulated arbitrarily. */ -#define OPTIMIZER_HIDE_VAR(var) __asm__ ("" : "=r" (var) : "0" (var)) +#define OPTIMIZER_HIDE_VAR(var) \ + __asm__ ("" : "=r" (var) : "0" (var)) #ifdef __CHECKER__ -#define __must_be_array(arr) 0 +#define __must_be_array(a) 0 #else /* &a[0] degrades to a pointer: a different type from an array */ -#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) +#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) #endif /* * Force always-inline if the user requests it so via the .config, * or if gcc is too old: */ -#if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \ +#if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \ !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4) -# define inline inline __attribute__((always_inline)) notrace -# define __inline__ __inline__ __attribute__((always_inline)) notrace -# define __inline __inline __attribute__((always_inline)) notrace +#define inline inline __attribute__((always_inline)) notrace +#define __inline__ __inline__ __attribute__((always_inline)) notrace +#define __inline __inline __attribute__((always_inline)) notrace #else /* A lot of inline functions can cause havoc with function tracing */ -# define inline inline notrace -# define __inline__ __inline__ notrace -# define __inline __inline notrace +#define inline inline notrace +#define __inline__ __inline__ notrace +#define __inline __inline notrace #endif -#define __deprecated __attribute__((deprecated)) -#define __packed __attribute__((packed)) -#define __weak __attribute__((weak)) -#define __alias(symbol) __attribute__((alias(#symbol))) +#define __always_inline inline __attribute__((always_inline)) +#define noinline __attribute__((noinline)) + +#define __deprecated __attribute__((deprecated)) +#define __packed __attribute__((packed)) +#define __weak __attribute__((weak)) +#define __alias(symbol) __attribute__((alias(#symbol))) /* - * it doesn't make sense on ARM (currently the only user of __naked) to trace - * naked functions because then mcount is called without stack and frame pointer - * being set up and there is no chance to restore the lr register to the value - * before mcount was called. + * it doesn't make sense on ARM (currently the only user of __naked) + * to trace naked functions because then mcount is called without + * stack and frame pointer being set up and there is no chance to + * restore the lr register to the value before mcount was called. + * + * The asm() bodies of naked functions often depend on standard calling + * conventions, therefore they must be noinline and noclone. * - * The asm() bodies of naked functions often depend on standard calling conventions, - * therefore they must be noinline and noclone. GCC 4.[56] currently fail to enforce - * this, so we must do so ourselves. See GCC PR44290. + * GCC 4.[56] currently fail to enforce this, so we must do so ourselves. + * See GCC PR44290. */ -#define __naked __attribute__((naked)) noinline __noclone notrace +#define __naked __attribute__((naked)) noinline __noclone notrace -#define __noreturn __attribute__((noreturn)) +#define __noreturn __attribute__((noreturn)) /* * From the GCC manual: @@ -106,14 +114,13 @@ * would be. * [...] */ -#define __pure __attribute__((pure)) -#define __aligned(x) __attribute__((aligned(x))) -#define __printf(a, b) __attribute__((format(printf, a, b))) -#define __scanf(a, b) __attribute__((format(scanf, a, b))) -#define noinline __attribute__((noinline)) -#define __attribute_const__ __attribute__((__const__)) -#define __maybe_unused __attribute__((unused)) -#define __always_unused __attribute__((unused)) +#define __pure __attribute__((pure)) +#define __aligned(x) __attribute__((aligned(x))) +#define __printf(a, b) __attribute__((format(printf, a, b))) +#define __scanf(a, b) __attribute__((format(scanf, a, b))) +#define __attribute_const__ __attribute__((__const__)) +#define __maybe_unused __attribute__((unused)) +#define __always_unused __attribute__((unused)) #define __gcc_header(x) #x #define _gcc_header(x) __gcc_header(linux/compiler-gcc##x.h) @@ -129,5 +136,3 @@ * code */ #define uninitialized_var(x) x = x - -#define __always_inline inline __attribute__((always_inline)) -- cgit v1.2.3 From cb984d101b30eb7478d32df56a0023e4603cba7f Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 25 Jun 2015 15:01:02 -0700 Subject: compiler-gcc: integrate the various compiler-gcc[345].h files As gcc major version numbers are going to advance rather rapidly in the future, there's no real value in separate files for each compiler version. Deduplicate some of the macros #defined in each file too. Neaten comments using normal kernel commenting style. Signed-off-by: Joe Perches Cc: Andi Kleen Cc: Michal Marek Cc: Segher Boessenkool Cc: Sasha Levin Cc: Anton Blanchard Cc: Alan Modra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 120 ++++++++++++++++++++++++++++++++++++++++-- include/linux/compiler-gcc3.h | 23 -------- include/linux/compiler-gcc4.h | 91 -------------------------------- include/linux/compiler-gcc5.h | 67 ----------------------- 4 files changed, 116 insertions(+), 185 deletions(-) delete mode 100644 include/linux/compiler-gcc3.h delete mode 100644 include/linux/compiler-gcc4.h delete mode 100644 include/linux/compiler-gcc5.h (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 5c2c14e3c647..dfaa7b3e9ae9 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -122,10 +122,122 @@ #define __maybe_unused __attribute__((unused)) #define __always_unused __attribute__((unused)) -#define __gcc_header(x) #x -#define _gcc_header(x) __gcc_header(linux/compiler-gcc##x.h) -#define gcc_header(x) _gcc_header(x) -#include gcc_header(__GNUC__) +/* gcc version specific checks */ + +#if GCC_VERSION < 30200 +# error Sorry, your compiler is too old - please upgrade it. +#endif + +#if GCC_VERSION < 30300 +# define __used __attribute__((__unused__)) +#else +# define __used __attribute__((__used__)) +#endif + +#ifdef CONFIG_GCOV_KERNEL +# if GCC_VERSION < 30400 +# error "GCOV profiling support for gcc versions below 3.4 not included" +# endif /* __GNUC_MINOR__ */ +#endif /* CONFIG_GCOV_KERNEL */ + +#if GCC_VERSION >= 30400 +#define __must_check __attribute__((warn_unused_result)) +#endif + +#if GCC_VERSION >= 40000 + +/* GCC 4.1.[01] miscompiles __weak */ +#ifdef __KERNEL__ +# if GCC_VERSION >= 40100 && GCC_VERSION <= 40101 +# error Your version of gcc miscompiles the __weak directive +# endif +#endif + +#define __used __attribute__((__used__)) +#define __compiler_offsetof(a, b) \ + __builtin_offsetof(a, b) + +#if GCC_VERSION >= 40100 && GCC_VERSION < 40600 +# define __compiletime_object_size(obj) __builtin_object_size(obj, 0) +#endif + +#if GCC_VERSION >= 40300 +/* Mark functions as cold. gcc will assume any path leading to a call + * to them will be unlikely. This means a lot of manual unlikely()s + * are unnecessary now for any paths leading to the usual suspects + * like BUG(), printk(), panic() etc. [but let's keep them for now for + * older compilers] + * + * Early snapshots of gcc 4.3 don't support this and we can't detect this + * in the preprocessor, but we can live with this because they're unreleased. + * Maketime probing would be overkill here. + * + * gcc also has a __attribute__((__hot__)) to move hot functions into + * a special section, but I don't see any sense in this right now in + * the kernel context + */ +#define __cold __attribute__((__cold__)) + +#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) + +#ifndef __CHECKER__ +# define __compiletime_warning(message) __attribute__((warning(message))) +# define __compiletime_error(message) __attribute__((error(message))) +#endif /* __CHECKER__ */ +#endif /* GCC_VERSION >= 40300 */ + +#if GCC_VERSION >= 40500 +/* + * Mark a position in code as unreachable. This can be used to + * suppress control flow warnings after asm blocks that transfer + * control elsewhere. + * + * Early snapshots of gcc 4.5 don't support this and we can't detect + * this in the preprocessor, but we can live with this because they're + * unreleased. Really, we need to have autoconf for the kernel. + */ +#define unreachable() __builtin_unreachable() + +/* Mark a function definition as prohibited from being cloned. */ +#define __noclone __attribute__((__noclone__)) + +#endif /* GCC_VERSION >= 40500 */ + +#if GCC_VERSION >= 40600 +/* + * Tell the optimizer that something else uses this function or variable. + */ +#define __visible __attribute__((externally_visible)) +#endif + +/* + * GCC 'asm goto' miscompiles certain code sequences: + * + * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 + * + * Work it around via a compiler barrier quirk suggested by Jakub Jelinek. + * + * (asm goto is automatically volatile - the naming reflects this.) + */ +#define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0) + +#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP +#if GCC_VERSION >= 40400 +#define __HAVE_BUILTIN_BSWAP32__ +#define __HAVE_BUILTIN_BSWAP64__ +#endif +#if GCC_VERSION >= 40800 || (defined(__powerpc__) && GCC_VERSION >= 40600) +#define __HAVE_BUILTIN_BSWAP16__ +#endif +#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ + +#if GCC_VERSION >= 50000 +#define KASAN_ABI_VERSION 4 +#elif GCC_VERSION >= 40902 +#define KASAN_ABI_VERSION 3 +#endif + +#endif /* gcc version >= 40000 specific checks */ #if !defined(__noclone) #define __noclone /* not needed */ diff --git a/include/linux/compiler-gcc3.h b/include/linux/compiler-gcc3.h deleted file mode 100644 index 7d89febe4d79..000000000000 --- a/include/linux/compiler-gcc3.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef __LINUX_COMPILER_H -#error "Please don't include directly, include instead." -#endif - -#if GCC_VERSION < 30200 -# error Sorry, your compiler is too old - please upgrade it. -#endif - -#if GCC_VERSION >= 30300 -# define __used __attribute__((__used__)) -#else -# define __used __attribute__((__unused__)) -#endif - -#if GCC_VERSION >= 30400 -#define __must_check __attribute__((warn_unused_result)) -#endif - -#ifdef CONFIG_GCOV_KERNEL -# if GCC_VERSION < 30400 -# error "GCOV profiling support for gcc versions below 3.4 not included" -# endif /* __GNUC_MINOR__ */ -#endif /* CONFIG_GCOV_KERNEL */ diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h deleted file mode 100644 index 769e19864632..000000000000 --- a/include/linux/compiler-gcc4.h +++ /dev/null @@ -1,91 +0,0 @@ -#ifndef __LINUX_COMPILER_H -#error "Please don't include directly, include instead." -#endif - -/* GCC 4.1.[01] miscompiles __weak */ -#ifdef __KERNEL__ -# if GCC_VERSION >= 40100 && GCC_VERSION <= 40101 -# error Your version of gcc miscompiles the __weak directive -# endif -#endif - -#define __used __attribute__((__used__)) -#define __must_check __attribute__((warn_unused_result)) -#define __compiler_offsetof(a,b) __builtin_offsetof(a,b) - -#if GCC_VERSION >= 40100 && GCC_VERSION < 40600 -# define __compiletime_object_size(obj) __builtin_object_size(obj, 0) -#endif - -#if GCC_VERSION >= 40300 -/* Mark functions as cold. gcc will assume any path leading to a call - to them will be unlikely. This means a lot of manual unlikely()s - are unnecessary now for any paths leading to the usual suspects - like BUG(), printk(), panic() etc. [but let's keep them for now for - older compilers] - - Early snapshots of gcc 4.3 don't support this and we can't detect this - in the preprocessor, but we can live with this because they're unreleased. - Maketime probing would be overkill here. - - gcc also has a __attribute__((__hot__)) to move hot functions into - a special section, but I don't see any sense in this right now in - the kernel context */ -#define __cold __attribute__((__cold__)) - -#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) - -#ifndef __CHECKER__ -# define __compiletime_warning(message) __attribute__((warning(message))) -# define __compiletime_error(message) __attribute__((error(message))) -#endif /* __CHECKER__ */ -#endif /* GCC_VERSION >= 40300 */ - -#if GCC_VERSION >= 40500 -/* - * Mark a position in code as unreachable. This can be used to - * suppress control flow warnings after asm blocks that transfer - * control elsewhere. - * - * Early snapshots of gcc 4.5 don't support this and we can't detect - * this in the preprocessor, but we can live with this because they're - * unreleased. Really, we need to have autoconf for the kernel. - */ -#define unreachable() __builtin_unreachable() - -/* Mark a function definition as prohibited from being cloned. */ -#define __noclone __attribute__((__noclone__)) - -#endif /* GCC_VERSION >= 40500 */ - -#if GCC_VERSION >= 40600 -/* - * Tell the optimizer that something else uses this function or variable. - */ -#define __visible __attribute__((externally_visible)) -#endif - -/* - * GCC 'asm goto' miscompiles certain code sequences: - * - * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 - * - * Work it around via a compiler barrier quirk suggested by Jakub Jelinek. - * - * (asm goto is automatically volatile - the naming reflects this.) - */ -#define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0) - -#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP -#if GCC_VERSION >= 40400 -#define __HAVE_BUILTIN_BSWAP32__ -#define __HAVE_BUILTIN_BSWAP64__ -#endif -#if GCC_VERSION >= 40800 || (defined(__powerpc__) && GCC_VERSION >= 40600) -#define __HAVE_BUILTIN_BSWAP16__ -#endif -#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ - -#if GCC_VERSION >= 40902 -#define KASAN_ABI_VERSION 3 -#endif diff --git a/include/linux/compiler-gcc5.h b/include/linux/compiler-gcc5.h deleted file mode 100644 index efee493714eb..000000000000 --- a/include/linux/compiler-gcc5.h +++ /dev/null @@ -1,67 +0,0 @@ -#ifndef __LINUX_COMPILER_H -#error "Please don't include directly, include instead." -#endif - -#define __used __attribute__((__used__)) -#define __must_check __attribute__((warn_unused_result)) -#define __compiler_offsetof(a, b) __builtin_offsetof(a, b) - -/* Mark functions as cold. gcc will assume any path leading to a call - to them will be unlikely. This means a lot of manual unlikely()s - are unnecessary now for any paths leading to the usual suspects - like BUG(), printk(), panic() etc. [but let's keep them for now for - older compilers] - - Early snapshots of gcc 4.3 don't support this and we can't detect this - in the preprocessor, but we can live with this because they're unreleased. - Maketime probing would be overkill here. - - gcc also has a __attribute__((__hot__)) to move hot functions into - a special section, but I don't see any sense in this right now in - the kernel context */ -#define __cold __attribute__((__cold__)) - -#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) - -#ifndef __CHECKER__ -# define __compiletime_warning(message) __attribute__((warning(message))) -# define __compiletime_error(message) __attribute__((error(message))) -#endif /* __CHECKER__ */ - -/* - * Mark a position in code as unreachable. This can be used to - * suppress control flow warnings after asm blocks that transfer - * control elsewhere. - * - * Early snapshots of gcc 4.5 don't support this and we can't detect - * this in the preprocessor, but we can live with this because they're - * unreleased. Really, we need to have autoconf for the kernel. - */ -#define unreachable() __builtin_unreachable() - -/* Mark a function definition as prohibited from being cloned. */ -#define __noclone __attribute__((__noclone__)) - -/* - * Tell the optimizer that something else uses this function or variable. - */ -#define __visible __attribute__((externally_visible)) - -/* - * GCC 'asm goto' miscompiles certain code sequences: - * - * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 - * - * Work it around via a compiler barrier quirk suggested by Jakub Jelinek. - * - * (asm goto is automatically volatile - the naming reflects this.) - */ -#define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0) - -#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP -#define __HAVE_BUILTIN_BSWAP32__ -#define __HAVE_BUILTIN_BSWAP64__ -#define __HAVE_BUILTIN_BSWAP16__ -#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ - -#define KASAN_ABI_VERSION 4 -- cgit v1.2.3 From b86a50c3b5414eafdbee7f34af4a201a4a7817c2 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 25 Jun 2015 15:01:05 -0700 Subject: compiler-intel: fix wrong compiler barrier() macro Cleanup commit 73679e508201 ("compiler-intel.h: Remove duplicate definition") removed the double definition of __memory_barrier() intrinsics. However, in doing so, it also removed the preceding #undef barrier by accident, meaning, the actual barrier() macro from compiler-gcc.h with inline asm is still in place as __GNUC__ is provided. Subsequently, barrier() can never be defined as __memory_barrier() from compiler.h since it already has a definition in place and if we trust the comment in compiler-intel.h, ecc doesn't support gcc specific asm statements. I don't have an ecc at hand (unsure if that's still used in the field?) and only found this by accident during code review, a revert of that cleanup would be simplest option. Fixes: 73679e508201 ("compiler-intel.h: Remove duplicate definition") Signed-off-by: Daniel Borkmann Reviewed-by: Pranith Kumar Cc: Pranith Kumar Cc: H. Peter Anvin Cc: mancha security Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-intel.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h index 0c9a2f2c2802..d4c71132d07f 100644 --- a/include/linux/compiler-intel.h +++ b/include/linux/compiler-intel.h @@ -13,10 +13,12 @@ /* Intel ECC compiler doesn't support gcc specific asm stmts. * It uses intrinsics to do the equivalent things. */ +#undef barrier #undef barrier_data #undef RELOC_HIDE #undef OPTIMIZER_HIDE_VAR +#define barrier() __memory_barrier() #define barrier_data(ptr) barrier() #define RELOC_HIDE(ptr, off) \ -- cgit v1.2.3 From 8c7fbe5795a016259445a61e072eb0118aaf6a61 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 25 Jun 2015 15:01:16 -0700 Subject: stddef.h: move offsetofend inside #ifndef/#endif guard, neaten Commit 3876488444e7 ("include/stddef.h: Move offsetofend() from vfio.h to a generic kernel header") added offsetofend outside the normal include #ifndef/#endif guard. Move it inside. Miscellanea: o remove unnecessary blank line o standardize offsetof macros whitespace style Signed-off-by: Joe Perches Cc: Denys Vlasenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/stddef.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stddef.h b/include/linux/stddef.h index 076af437284d..9c61c7cda936 100644 --- a/include/linux/stddef.h +++ b/include/linux/stddef.h @@ -3,7 +3,6 @@ #include - #undef NULL #define NULL ((void *)0) @@ -14,10 +13,9 @@ enum { #undef offsetof #ifdef __compiler_offsetof -#define offsetof(TYPE,MEMBER) __compiler_offsetof(TYPE,MEMBER) +#define offsetof(TYPE, MEMBER) __compiler_offsetof(TYPE, MEMBER) #else -#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) -#endif +#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) #endif /** @@ -28,3 +26,5 @@ enum { */ #define offsetofend(TYPE, MEMBER) \ (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER)) + +#endif -- cgit v1.2.3 From 3033f14ab78c326871a4902591c2518410add24a Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Thu, 25 Jun 2015 15:01:19 -0700 Subject: clone: support passing tls argument via C rather than pt_regs magic clone has some of the quirkiest syscall handling in the kernel, with a pile of special cases, historical curiosities, and architecture-specific calling conventions. In particular, clone with CLONE_SETTLS accepts a parameter "tls" that the C entry point completely ignores and some assembly entry points overwrite; instead, the low-level arch-specific code pulls the tls parameter out of the arch-specific register captured as part of pt_regs on entry to the kernel. That's a massive hack, and it makes the arch-specific code only work when called via the specific existing syscall entry points; because of this hack, any new clone-like system call would have to accept an identical tls argument in exactly the same arch-specific position, rather than providing a unified system call entry point across architectures. The first patch allows architectures to handle the tls argument via normal C parameter passing, if they opt in by selecting HAVE_COPY_THREAD_TLS. The second patch makes 32-bit and 64-bit x86 opt into this. These two patches came out of the clone4 series, which isn't ready for this merge window, but these first two cleanup patches were entirely uncontroversial and have acks. I'd like to go ahead and submit these two so that other architectures can begin building on top of this and opting into HAVE_COPY_THREAD_TLS. However, I'm also happy to wait and send these through the next merge window (along with v3 of clone4) if anyone would prefer that. This patch (of 2): clone with CLONE_SETTLS accepts an argument to set the thread-local storage area for the new thread. sys_clone declares an int argument tls_val in the appropriate point in the argument list (based on the various CLONE_BACKWARDS variants), but doesn't actually use or pass along that argument. Instead, sys_clone calls do_fork, which calls copy_process, which calls the arch-specific copy_thread, and copy_thread pulls the corresponding syscall argument out of the pt_regs captured at kernel entry (knowing what argument of clone that architecture passes tls in). Apart from being awful and inscrutable, that also only works because only one code path into copy_thread can pass the CLONE_SETTLS flag, and that code path comes from sys_clone with its architecture-specific argument-passing order. This prevents introducing a new version of the clone system call without propagating the same architecture-specific position of the tls argument. However, there's no reason to pull the argument out of pt_regs when sys_clone could just pass it down via C function call arguments. Introduce a new CONFIG_HAVE_COPY_THREAD_TLS for architectures to opt into, and a new copy_thread_tls that accepts the tls parameter as an additional unsigned long (syscall-argument-sized) argument. Change sys_clone's tls argument to an unsigned long (which does not change the ABI), and pass that down to copy_thread_tls. Architectures that don't opt into copy_thread_tls will continue to ignore the C argument to sys_clone in favor of the pt_regs captured at kernel entry, and thus will be unable to introduce new versions of the clone syscall. Patch co-authored by Josh Triplett and Thiago Macieira. Signed-off-by: Josh Triplett Acked-by: Andy Lutomirski Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Thiago Macieira Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 15 +++++++++++++++ include/linux/syscalls.h | 6 +++--- 2 files changed, 18 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6633e83e608a..93ed0b682adb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2556,8 +2556,22 @@ extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); /* Remove the current tasks stale references to the old mm_struct */ extern void mm_release(struct task_struct *, struct mm_struct *); +#ifdef CONFIG_HAVE_COPY_THREAD_TLS +extern int copy_thread_tls(unsigned long, unsigned long, unsigned long, + struct task_struct *, unsigned long); +#else extern int copy_thread(unsigned long, unsigned long, unsigned long, struct task_struct *); + +/* Architectures that haven't opted into copy_thread_tls get the tls argument + * via pt_regs, so ignore the tls argument passed via C. */ +static inline int copy_thread_tls( + unsigned long clone_flags, unsigned long sp, unsigned long arg, + struct task_struct *p, unsigned long tls) +{ + return copy_thread(clone_flags, sp, arg, p); +} +#endif extern void flush_thread(void); extern void exit_thread(void); @@ -2576,6 +2590,7 @@ extern int do_execveat(int, struct filename *, const char __user * const __user *, const char __user * const __user *, int); +extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long); extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *); struct task_struct *fork_idle(int); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 76d1e38aabe1..bb51becf23f8 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -827,15 +827,15 @@ asmlinkage long sys_syncfs(int fd); asmlinkage long sys_fork(void); asmlinkage long sys_vfork(void); #ifdef CONFIG_CLONE_BACKWARDS -asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, int, +asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, unsigned long, int __user *); #else #ifdef CONFIG_CLONE_BACKWARDS3 asmlinkage long sys_clone(unsigned long, unsigned long, int, int __user *, - int __user *, int); + int __user *, unsigned long); #else asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, - int __user *, int); + int __user *, unsigned long); #endif #endif -- cgit v1.2.3 From d43ff430f434d862db59582c0f1f02382a678036 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 25 Jun 2015 15:01:24 -0700 Subject: printk: guard the amount written per line by devkmsg_read() This patchset updates netconsole so that it can emit messages with the same header as used in /dev/kmsg which gives neconsole receiver full log information which enables things like structured logging and detection of lost messages. This patch (of 7): devkmsg_read() uses 8k buffer and assumes that the formatted output message won't overrun which seems safe given LOG_LINE_MAX, the current use of dict and the escaping method being used; however, we're planning to use devkmsg formatting wider and accounting for the buffer size properly isn't that complicated. This patch defines CONSOLE_EXT_LOG_MAX as 8192 and updates devkmsg_read() so that it limits output accordingly. Signed-off-by: Tejun Heo Cc: David Miller Cc: Kay Sievers Reviewed-by: Petr Mladek Cc: Tetsuo Handa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/printk.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/printk.h b/include/linux/printk.h index 9b30871c9149..58b1fec40d37 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -30,6 +30,8 @@ static inline const char *printk_skip_level(const char *buffer) return buffer; } +#define CONSOLE_EXT_LOG_MAX 8192 + /* printk's without a loglevel use this.. */ #define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT -- cgit v1.2.3 From 6fe29354befe4c46eb308b662155d4d8017358e1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 25 Jun 2015 15:01:30 -0700 Subject: printk: implement support for extended console drivers printk log_buf keeps various metadata for each message including its sequence number and timestamp. The metadata is currently available only through /dev/kmsg and stripped out before passed onto console drivers. We want this metadata to be available to console drivers too so that console consumers can get full information including the metadata and dictionary, which among other things can be used to detect whether messages got lost in transit. This patch implements support for extended console drivers. Consoles can indicate that they want extended messages by setting the new CON_EXTENDED flag and they'll be fed messages formatted the same way as /dev/kmsg. ",,,;\n" If extended consoles exist, in-kernel fragment assembly is disabled. This ensures that all messages emitted to consoles have full metadata including sequence number. The contflag carries enough information to reassemble the fragments from the reader side trivially. Note that this only affects /dev/kmsg. Regular console and /proc/kmsg outputs are not affected by this change. * Extended message formatting for console drivers is enabled iff there are registered extended consoles. * Comment describing /dev/kmsg message format updated to add missing contflag field and help distinguishing variable from verbatim terms. Signed-off-by: Tejun Heo Cc: David Miller Cc: Kay Sievers Reviewed-by: Petr Mladek Cc: Tetsuo Handa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/console.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 9f50fb413c11..bd194343c346 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -115,6 +115,7 @@ static inline int con_debug_leave(void) #define CON_BOOT (8) #define CON_ANYTIME (16) /* Safe to call when cpu is offline */ #define CON_BRL (32) /* Used for a braille device */ +#define CON_EXTENDED (64) /* Use the extended output format a la /dev/kmsg */ struct console { char name[16]; -- cgit v1.2.3 From 3ea4331c60be3eee4c97e5ddabad95399f879b76 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 25 Jun 2015 15:01:47 -0700 Subject: check_syslog_permissions() cleanup Patch fixes drawbacks in heck_syslog_permissions() noticed by AKPM: "from_file handling makes me cry. That's not a boolean - it's an enumerated value with two values currently defined. But the code in check_syslog_permissions() treats it as a boolean and also hardwires the knowledge that SYSLOG_FROM_PROC == 1 (or == `true`). And the name is wrong: it should be called from_proc to match SYSLOG_FROM_PROC." Signed-off-by: Vasily Averin Cc: Kees Cook Cc: Josh Boyer Cc: Eric Paris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/syslog.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/syslog.h b/include/linux/syslog.h index 4b7b875a7ce1..c3a7f0cc3a27 100644 --- a/include/linux/syslog.h +++ b/include/linux/syslog.h @@ -47,12 +47,12 @@ #define SYSLOG_FROM_READER 0 #define SYSLOG_FROM_PROC 1 -int do_syslog(int type, char __user *buf, int count, bool from_file); +int do_syslog(int type, char __user *buf, int count, int source); #ifdef CONFIG_PRINTK -int check_syslog_permissions(int type, bool from_file); +int check_syslog_permissions(int type, int source); #else -static inline int check_syslog_permissions(int type, bool from_file) +static inline int check_syslog_permissions(int type, int source) { return 0; } -- cgit v1.2.3 From 94df290404cd0da8016698bf3f398410f29d9a64 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 25 Jun 2015 15:02:22 -0700 Subject: lib/string.c: introduce strreplace() Strings are sometimes sanitized by replacing a certain character (often '/') by another (often '!'). In a few places, this is done the same way Schlemiel the Painter would do it. Others are slightly smarter but still do multiple strchr() calls. Introduce strreplace() to do this using a single function call and a single pass over the string. One would expect the return value to be one of three things: void, s, or the number of replacements made. I chose the fourth, returning a pointer to the end of the string. This is more likely to be useful (for example allowing the caller to avoid a strlen call). Signed-off-by: Rasmus Villemoes Cc: "Theodore Ts'o" Cc: Greg Kroah-Hartman Cc: Neil Brown Cc: Steven Rostedt Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/string.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/string.h b/include/linux/string.h index e40099e585c9..a8d90db9c4b0 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -111,6 +111,7 @@ extern int memcmp(const void *,const void *,__kernel_size_t); extern void * memchr(const void *,int,__kernel_size_t); #endif void *memchr_inv(const void *s, int c, size_t n); +char *strreplace(char *s, char old, char new); extern void kfree_const(const void *x); -- cgit v1.2.3 From 78d8e58a086b214dddf1fd463e20a7e1d82d7866 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 26 Jun 2015 10:01:13 -0400 Subject: Revert "block, dm: don't copy bios for request clones" This reverts commit 5f1b670d0bef508a5554d92525f5f6d00d640b38. Justification for revert as reported in this dm-devel post: https://www.redhat.com/archives/dm-devel/2015-June/msg00160.html this change should not be pushed to mainline yet. Firstly, Christoph has a newer version of the patch that fixes silent data corruption problem: https://www.redhat.com/archives/dm-devel/2015-May/msg00229.html And the new version still depends on LLDDs to always complete requests to the end when error happens, while block API doesn't enforce such a requirement. If the assumption is ever broken, the inconsistency between request and bio (e.g. rq->__sector and rq->bio) will cause silent data corruption: https://www.redhat.com/archives/dm-devel/2015-June/msg00022.html Reported-by: Junichi Nomura Signed-off-by: Mike Snitzer --- include/linux/blk_types.h | 2 -- include/linux/blkdev.h | 6 +++++- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 6ab9d12d1f17..7303b3405520 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -192,7 +192,6 @@ enum rq_flag_bits { __REQ_HASHED, /* on IO scheduler merge hash */ __REQ_MQ_INFLIGHT, /* track inflight for MQ */ __REQ_NO_TIMEOUT, /* requests may never expire */ - __REQ_CLONE, /* cloned bios */ __REQ_NR_BITS, /* stops here */ }; @@ -247,6 +246,5 @@ enum rq_flag_bits { #define REQ_HASHED (1ULL << __REQ_HASHED) #define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT) #define REQ_NO_TIMEOUT (1ULL << __REQ_NO_TIMEOUT) -#define REQ_CLONE (1ULL << __REQ_CLONE) #endif /* __LINUX_BLK_TYPES_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 776d2ee43ba6..41c0fb573dff 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -775,7 +775,11 @@ extern void blk_add_request_payload(struct request *rq, struct page *page, unsigned int len); extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); extern int blk_lld_busy(struct request_queue *q); -extern void blk_rq_prep_clone(struct request *rq, struct request *rq_src); +extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, + struct bio_set *bs, gfp_t gfp_mask, + int (*bio_ctr)(struct bio *, struct bio *, void *), + void *data); +extern void blk_rq_unprep_clone(struct request *rq); extern int blk_insert_cloned_request(struct request_queue *q, struct request *rq); extern void blk_delay_queue(struct request_queue *, unsigned long); -- cgit v1.2.3 From 5212e11fde4d40fa627668b4f2222d20db488f71 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Thu, 25 Jun 2015 04:20:32 -0400 Subject: nd_btt: atomic sector updates BTT stands for Block Translation Table, and is a way to provide power fail sector atomicity semantics for block devices that have the ability to perform byte granularity IO. It relies on the capability of libnvdimm namespace devices to do byte aligned IO. The BTT works as a stacked blocked device, and reserves a chunk of space from the backing device for its accounting metadata. It is a bio-based driver because all IO is done synchronously, and there is no queuing or asynchronous completions at either the device or the driver level. The BTT uses 'lanes' to index into various 'on-disk' data structures, and lanes also act as a synchronization mechanism in case there are more CPUs than available lanes. We did a comparison between two lane lock strategies - first where we kept an atomic counter around that tracked which was the last lane that was used, and 'our' lane was determined by atomically incrementing that. That way, for the nr_cpus > nr_lanes case, theoretically, no CPU would be blocked waiting for a lane. The other strategy was to use the cpu number we're scheduled on to and hash it to a lane number. Theoretically, this could block an IO that could've otherwise run using a different, free lane. But some fio workloads showed that the direct cpu -> lane hash performed faster than tracking 'last lane' - my reasoning is the cache thrash caused by moving the atomic variable made that approach slower than simply waiting out the in-progress IO. This supports the conclusion that the driver can be a very simple bio-based one that does synchronous IOs instead of queuing. Cc: Andy Lutomirski Cc: Boaz Harrosh Cc: H. Peter Anvin Cc: Jens Axboe Cc: Ingo Molnar Cc: Christoph Hellwig Cc: Neil Brown Cc: Jeff Moyer Cc: Dave Chinner Cc: Greg KH [jmoyer: fix nmi watchdog timeout in btt_map_init] [jmoyer: move btt initialization to module load path] [jmoyer: fix memory leak in the btt initialization path] [jmoyer: Don't overwrite corrupted arenas] Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index a59dca17b3aa..531d99dfac68 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -85,6 +85,7 @@ struct nd_region_desc { const struct attribute_group **attr_groups; struct nd_interleave_set *nd_set; void *provider_data; + int num_lanes; }; struct nvdimm_bus; -- cgit v1.2.3 From 047fc8a1f9a6330eacc80374dff087e20dc2304b Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Thu, 25 Jun 2015 04:21:02 -0400 Subject: libnvdimm, nfit, nd_blk: driver for BLK-mode access persistent memory The libnvdimm implementation handles allocating dimm address space (DPA) between PMEM and BLK mode interfaces. After DPA has been allocated from a BLK-region to a BLK-namespace the nd_blk driver attaches to handle I/O as a struct bio based block device. Unlike PMEM, BLK is required to handle platform specific details like mmio register formats and memory controller interleave. For this reason the libnvdimm generic nd_blk driver calls back into the bus provider to carry out the I/O. This initial implementation handles the BLK interface defined by the ACPI 6 NFIT [1] and the NVDIMM DSM Interface Example [2] composed from DCR (dimm control region), BDW (block data window), IDT (interleave descriptor) NFIT structures and the hardware register format. [1]: http://www.uefi.org/sites/default/files/resources/ACPI_6.0.pdf [2]: http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf Cc: Andy Lutomirski Cc: Boaz Harrosh Cc: H. Peter Anvin Cc: Jens Axboe Cc: Ingo Molnar Cc: Christoph Hellwig Signed-off-by: Ross Zwisler Acked-by: Rafael J. Wysocki Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 531d99dfac68..7fc1b25bdb5d 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -14,6 +14,7 @@ */ #ifndef __LIBNVDIMM_H__ #define __LIBNVDIMM_H__ +#include #include #include @@ -89,8 +90,24 @@ struct nd_region_desc { }; struct nvdimm_bus; -struct device; struct module; +struct device; +struct nd_blk_region; +struct nd_blk_region_desc { + int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev); + void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev); + int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, + void *iobuf, u64 len, int rw); + struct nd_region_desc ndr_desc; +}; + +static inline struct nd_blk_region_desc *to_blk_region_desc( + struct nd_region_desc *ndr_desc) +{ + return container_of(ndr_desc, struct nd_blk_region_desc, ndr_desc); + +} + struct nvdimm_bus *__nvdimm_bus_register(struct device *parent, struct nvdimm_bus_descriptor *nfit_desc, struct module *module); #define nvdimm_bus_register(parent, desc) \ @@ -99,10 +116,10 @@ void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus); struct nvdimm_bus *to_nvdimm_bus(struct device *dev); struct nvdimm *to_nvdimm(struct device *dev); struct nd_region *to_nd_region(struct device *dev); +struct nd_blk_region *to_nd_blk_region(struct device *dev); struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus); const char *nvdimm_name(struct nvdimm *nvdimm); void *nvdimm_provider_data(struct nvdimm *nvdimm); -void *nd_region_provider_data(struct nd_region *nd_region); struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, const struct attribute_group **groups, unsigned long flags, unsigned long *dsm_mask); @@ -120,5 +137,11 @@ struct nd_region *nvdimm_blk_region_create(struct nvdimm_bus *nvdimm_bus, struct nd_region_desc *ndr_desc); struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus, struct nd_region_desc *ndr_desc); +void *nd_region_provider_data(struct nd_region *nd_region); +void *nd_blk_region_provider_data(struct nd_blk_region *ndbr); +void nd_blk_region_set_provider_data(struct nd_blk_region *ndbr, void *data); +struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr); +unsigned int nd_region_acquire_lane(struct nd_region *nd_region); +void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane); u64 nd_fletcher64(void *addr, size_t len, bool le); #endif /* __LIBNVDIMM_H__ */ -- cgit v1.2.3 From 581388209405902b56d055f644b4dd124a206112 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 23 Jun 2015 20:08:34 -0400 Subject: libnvdimm, nfit: handle unarmed dimms, mark namespaces read-only Upon detection of an unarmed dimm in a region, arrange for descendant BTT, PMEM, or BLK instances to be read-only. A dimm is primarily marked "unarmed" via flags passed by platform firmware (NFIT). The flags in the NFIT memory device sub-structure indicate the state of the data on the nvdimm relative to its energy source or last "flush to persistence". For the most part there is nothing the driver can do but advertise the state of these flags in sysfs and emit a message if firmware indicates that the contents of the device may be corrupted. However, for the case of ACPI_NFIT_MEM_ARMED, the driver can arrange for the block devices incorporating that nvdimm to be marked read-only. This is a safe default as the data is still available and new writes are held off until the administrator either forces read-write mode, or the energy source becomes armed. A 'read_only' attribute is added to REGION devices to allow for overriding the default read-only policy of all descendant block devices. Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 7fc1b25bdb5d..dc799a29ed1a 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -21,6 +21,8 @@ enum { /* when a dimm supports both PMEM and BLK access a label is required */ NDD_ALIASING = 1 << 0, + /* unarmed memory devices may not persist writes */ + NDD_UNARMED = 1 << 1, /* need to set a limit somewhere, but yes, this is likely overkill */ ND_IOCTL_MAX_BUFLEN = SZ_4M, -- cgit v1.2.3 From 99759869faf15471cfce251bc138848d8af7d162 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Fri, 19 Jun 2015 17:14:15 -0600 Subject: acpi: Add acpi_map_pxm_to_online_node() The kernel initializes CPU & memory's NUMA topology from ACPI SRAT table. Some other ACPI tables, such as NFIT and DMAR, also contain proximity IDs for their device's NUMA topology. This information can be used to improve performance of these devices. This patch introduces acpi_map_pxm_to_online_node(), which is similar to acpi_map_pxm_to_node(), but always returns an online node. When the mapped node from a given proximity ID is offline, it looks up the node distance table and returns the nearest online node. ACPI device drivers, which are called after the NUMA initialization has completed in the kernel, can call this interface to obtain their device NUMA topology from ACPI tables. Such drivers do not have to deal with offline nodes. A node may be offline when a device proximity ID is unique, SRAT memory entry does not exist, or NUMA is disabled, ex. "numa=off" on x86. This patch also moves the pxm range check from acpi_get_node() to acpi_map_pxm_to_node(). Signed-off-by: Toshi Kani Acked-by: Rafael J. Wysocki > Signed-off-by: Dan Williams --- include/linux/acpi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index e4da5e35e29c..1b3bbb11d11c 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -289,8 +289,13 @@ extern void acpi_dmi_osi_linux(int enable, const struct dmi_system_id *d); extern void acpi_osi_setup(char *str); #ifdef CONFIG_ACPI_NUMA +int acpi_map_pxm_to_online_node(int pxm); int acpi_get_node(acpi_handle handle); #else +static inline int acpi_map_pxm_to_online_node(int pxm) +{ + return 0; +} static inline int acpi_get_node(acpi_handle handle) { return 0; -- cgit v1.2.3 From 41d7a6d637e1440f5410cb43c25a3c41255540c5 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Fri, 19 Jun 2015 12:18:33 -0600 Subject: libnvdimm: Set numa_node to NVDIMM devices ACPI NFIT table has System Physical Address Range Structure entries that describe a proximity ID of each range when ACPI_NFIT_PROXIMITY_VALID is set in the flags. Change acpi_nfit_register_region() to map a proximity ID to its node ID, and set it to a new numa_node field of nd_region_desc, which is then conveyed to the nd_region device. The device core arranges for btt and namespace devices to inherit their node from their parent region. Signed-off-by: Toshi Kani [djbw: move set_dev_node() from region.c to bus.c] Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index dc799a29ed1a..30b3deaafd51 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -89,6 +89,7 @@ struct nd_region_desc { struct nd_interleave_set *nd_set; void *provider_data; int num_lanes; + int numa_node; }; struct nvdimm_bus; -- cgit v1.2.3 From 74ae66c3b14ffa94c8d2dea201cdf8e6203d13d5 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Fri, 19 Jun 2015 12:18:34 -0600 Subject: libnvdimm: Add sysfs numa_node to NVDIMM devices Add support of sysfs 'numa_node' to I/O-related NVDIMM devices under /sys/bus/nd/devices, regionN, namespaceN.0, and bttN.x. An example of numa_node values on a 2-socket system with a single NVDIMM range on each socket is shown below. /sys/bus/nd/devices |-- btt0.0/numa_node:0 |-- btt1.0/numa_node:1 |-- btt1.1/numa_node:1 |-- namespace0.0/numa_node:0 |-- namespace1.0/numa_node:1 |-- region0/numa_node:0 |-- region1/numa_node:1 These numa_node files are then linked under the block class of their device names. /sys/class/block/pmem0/device/numa_node:0 /sys/class/block/pmem1s/device/numa_node:1 This enables numactl(8) to accept 'block:' and 'file:' paths of pmem and btt devices as shown in the examples below. numactl --preferred block:pmem0 --show numactl --preferred file:/dev/pmem1s --show Signed-off-by: Toshi Kani Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 30b3deaafd51..75e3af01ee32 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -38,6 +38,7 @@ enum { extern struct attribute_group nvdimm_bus_attribute_group; extern struct attribute_group nvdimm_attribute_group; extern struct attribute_group nd_device_attribute_group; +extern struct attribute_group nd_numa_attribute_group; extern struct attribute_group nd_region_attribute_group; extern struct attribute_group nd_mapping_attribute_group; -- cgit v1.2.3 From 61031952f4c89dba1065f7a5b9419badb112554c Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Thu, 25 Jun 2015 03:08:39 -0400 Subject: arch, x86: pmem api for ensuring durability of persistent memory updates Based on an original patch by Ross Zwisler [1]. Writes to persistent memory have the potential to be posted to cpu cache, cpu write buffers, and platform write buffers (memory controller) before being committed to persistent media. Provide apis, memcpy_to_pmem(), wmb_pmem(), and memremap_pmem(), to write data to pmem and assert that it is durable in PMEM (a persistent linear address range). A '__pmem' attribute is added so sparse can track proper usage of pointers to pmem. This continues the status quo of pmem being x86 only for 4.2, but reworks to ioremap, and wider implementation of memremap() will enable other archs in 4.3. [1]: https://lists.01.org/pipermail/linux-nvdimm/2015-May/000932.html Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Ross Zwisler [djbw: various reworks] Signed-off-by: Dan Williams --- include/linux/compiler.h | 2 + include/linux/pmem.h | 153 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 155 insertions(+) create mode 100644 include/linux/pmem.h (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 867722591be2..9a528d945498 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -21,6 +21,7 @@ # define __rcu __attribute__((noderef, address_space(4))) #else # define __rcu +# define __pmem __attribute__((noderef, address_space(5))) #endif extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); @@ -42,6 +43,7 @@ extern void __chk_io_ptr(const volatile void __iomem *); # define __cond_lock(x,c) (c) # define __percpu # define __rcu +# define __pmem #endif /* Indirect macros required for expanded argument pasting, eg. __LINE__. */ diff --git a/include/linux/pmem.h b/include/linux/pmem.h new file mode 100644 index 000000000000..f6481a0b1d4f --- /dev/null +++ b/include/linux/pmem.h @@ -0,0 +1,153 @@ +/* + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef __PMEM_H__ +#define __PMEM_H__ + +#include + +#ifdef CONFIG_ARCH_HAS_PMEM_API +#include +#else +static inline void arch_wmb_pmem(void) +{ + BUG(); +} + +static inline bool __arch_has_wmb_pmem(void) +{ + return false; +} + +static inline void __pmem *arch_memremap_pmem(resource_size_t offset, + unsigned long size) +{ + return NULL; +} + +static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, + size_t n) +{ + BUG(); +} +#endif + +/* + * Architectures that define ARCH_HAS_PMEM_API must provide + * implementations for arch_memremap_pmem(), arch_memcpy_to_pmem(), + * arch_wmb_pmem(), and __arch_has_wmb_pmem(). + */ + +static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size) +{ + memcpy(dst, (void __force const *) src, size); +} + +static inline void memunmap_pmem(void __pmem *addr) +{ + iounmap((void __force __iomem *) addr); +} + +/** + * arch_has_wmb_pmem - true if wmb_pmem() ensures durability + * + * For a given cpu implementation within an architecture it is possible + * that wmb_pmem() resolves to a nop. In the case this returns + * false, pmem api users are unable to ensure durability and may want to + * fall back to a different data consistency model, or otherwise notify + * the user. + */ +static inline bool arch_has_wmb_pmem(void) +{ + if (IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API)) + return __arch_has_wmb_pmem(); + return false; +} + +static inline bool arch_has_pmem_api(void) +{ + return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && arch_has_wmb_pmem(); +} + +/* + * These defaults seek to offer decent performance and minimize the + * window between i/o completion and writes being durable on media. + * However, it is undefined / architecture specific whether + * default_memremap_pmem + default_memcpy_to_pmem is sufficient for + * making data durable relative to i/o completion. + */ +static void default_memcpy_to_pmem(void __pmem *dst, const void *src, + size_t size) +{ + memcpy((void __force *) dst, src, size); +} + +static void __pmem *default_memremap_pmem(resource_size_t offset, + unsigned long size) +{ + /* TODO: convert to ioremap_wt() */ + return (void __pmem __force *)ioremap_nocache(offset, size); +} + +/** + * memremap_pmem - map physical persistent memory for pmem api + * @offset: physical address of persistent memory + * @size: size of the mapping + * + * Establish a mapping of the architecture specific memory type expected + * by memcpy_to_pmem() and wmb_pmem(). For example, it may be + * the case that an uncacheable or writethrough mapping is sufficient, + * or a writeback mapping provided memcpy_to_pmem() and + * wmb_pmem() arrange for the data to be written through the + * cache to persistent media. + */ +static inline void __pmem *memremap_pmem(resource_size_t offset, + unsigned long size) +{ + if (arch_has_pmem_api()) + return arch_memremap_pmem(offset, size); + return default_memremap_pmem(offset, size); +} + +/** + * memcpy_to_pmem - copy data to persistent memory + * @dst: destination buffer for the copy + * @src: source buffer for the copy + * @n: length of the copy in bytes + * + * Perform a memory copy that results in the destination of the copy + * being effectively evicted from, or never written to, the processor + * cache hierarchy after the copy completes. After memcpy_to_pmem() + * data may still reside in cpu or platform buffers, so this operation + * must be followed by a wmb_pmem(). + */ +static inline void memcpy_to_pmem(void __pmem *dst, const void *src, size_t n) +{ + if (arch_has_pmem_api()) + arch_memcpy_to_pmem(dst, src, n); + else + default_memcpy_to_pmem(dst, src, n); +} + +/** + * wmb_pmem - synchronize writes to persistent memory + * + * After a series of memcpy_to_pmem() operations this drains data from + * cpu write buffers and any platform (memory controller) buffers to + * ensure that written data is durable on persistent memory media. + */ +static inline void wmb_pmem(void) +{ + if (arch_has_pmem_api()) + arch_wmb_pmem(); +} +#endif /* __PMEM_H__ */ -- cgit v1.2.3 From 304adf8a8fff972f633bf52b3d160682d3f3d5d2 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 4 Jun 2015 12:13:26 +0800 Subject: genirq: Introduce helper irq_desc_get_irq() Introduce helper irq_desc_get_irq() to retrieve the irq number from the irq descriptor. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Marc Zyngier Link: http://lkml.kernel.org/r/1433391238-19471-17-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- include/linux/irqdesc.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 66c41b40ca50..a72bfd94ea10 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -100,6 +100,11 @@ static inline struct irq_desc *irq_data_to_desc(struct irq_data *data) #endif } +static inline unsigned int irq_desc_get_irq(struct irq_desc *desc) +{ + return desc->irq_data.irq; +} + static inline struct irq_data *irq_desc_get_irq_data(struct irq_desc *desc) { return &desc->irq_data; -- cgit v1.2.3 From bbc9d21fc0071c245c19077155ea371092ff0db8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 23 Jun 2015 15:01:30 +0200 Subject: genirq: Implement irq_set_handler_locked()/irq_set_chip_handler_name_locked() The main use case for the exisiting __irq_set_*_locked() inlines is to replace the handler [,chip and name] of an interrupt from a region which has the irq descriptor lock held, e.g. from the irq_set_type() callback. The first argument is the irq number, so the functions need so perform a pointless lookup of the interrupt descriptor for those cases which have the irq_data pointer handy. Provide new functions which take an irq_data pointer instead of the interrupt number, so the lookup of the interrupt descriptor can be avoided. Signed-off-by: Thomas Gleixner Cc: Jiang Liu Conflicts: include/linux/irqdesc.h --- include/linux/irqdesc.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index a72bfd94ea10..624a668e61f1 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -190,6 +190,47 @@ __irq_set_chip_handler_name_locked(unsigned int irq, struct irq_chip *chip, desc->name = name; } +/** + * irq_set_handler_locked - Set irq handler from a locked region + * @data: Pointer to the irq_data structure which identifies the irq + * @handler: Flow control handler function for this interrupt + * + * Sets the handler in the irq descriptor associated to @data. + * + * Must be called with irq_desc locked and valid parameters. Typical + * call site is the irq_set_type() callback. + */ +static inline void irq_set_handler_locked(struct irq_data *data, + irq_flow_handler_t handler) +{ + struct irq_desc *desc = irq_data_to_desc(data); + + desc->handle_irq = handler; +} + +/** + * irq_set_chip_handler_name_locked - Set chip, handler and name from a locked region + * @data: Pointer to the irq_data structure for which the chip is set + * @chip: Pointer to the new irq chip + * @handler: Flow control handler function for this interrupt + * @name: Name of the interrupt + * + * Replace the irq chip at the proper hierarchy level in @data and + * sets the handler and name in the associated irq descriptor. + * + * Must be called with irq_desc locked and valid parameters. + */ +static inline void +irq_set_chip_handler_name_locked(struct irq_data *data, struct irq_chip *chip, + irq_flow_handler_t handler, const char *name) +{ + struct irq_desc *desc = irq_data_to_desc(data); + + desc->handle_irq = handler; + desc->name = name; + data->chip = chip; +} + static inline int irq_balancing_disabled(unsigned int irq) { struct irq_desc *desc; -- cgit v1.2.3 From 6c5a0d891543873aefc3aaf846c1e7afe0982ff9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 27 Jun 2015 11:45:46 -0400 Subject: NFSv4.2: LAYOUTSTATS is optional to implement Make it so, by checking the return value for NFS4ERR_MOTSUPP and caching the information as a server capability. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 5e1273d4de14..a2ea1491d3df 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -237,5 +237,6 @@ struct nfs_server { #define NFS_CAP_SEEK (1U << 19) #define NFS_CAP_ALLOCATE (1U << 20) #define NFS_CAP_DEALLOCATE (1U << 21) +#define NFS_CAP_LAYOUTSTATS (1U << 22) #endif -- cgit v1.2.3 From cf2fde7b39e9446e2af015215d7fb695781af0c1 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 26 Jun 2015 06:44:38 +0930 Subject: param: fix module param locks when !CONFIG_SYSFS. As Dan Streetman points out, the entire point of locking for is to stop sysfs accesses, so they're elided entirely in the !SYSFS case. Reported-by: Stephen Rothwell Signed-off-by: Rusty Russell --- include/linux/module.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 6ba0e87fa804..46efa1c9de60 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -240,7 +240,9 @@ struct module { unsigned int num_syms; /* Kernel parameters. */ +#ifdef CONFIG_SYSFS struct mutex param_lock; +#endif struct kernel_param *kp; unsigned int num_kp; -- cgit v1.2.3 From ef90174f821041313d42d99c1c8b35a3af64a910 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Theou Date: Tue, 9 Jun 2015 09:55:02 -0700 Subject: watchdog: watchdog_core: Add watchdog registration deferral mechanism Currently, watchdog subsystem require the misc subsystem to register a watchdog. This may not be the case in case of an early registration of a watchdog, which can be required when the watchdog cannot be disabled. This patch introduces a deferral mechanism to remove this requirement. Signed-off-by: Jean-Baptiste Theou Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- include/linux/watchdog.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index a746bf5216f8..f47feada5b42 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -65,6 +65,8 @@ struct watchdog_ops { * @driver-data:Pointer to the drivers private data. * @lock: Lock for watchdog core internal use only. * @status: Field that contains the devices internal status bits. + * @deferred: entry in wtd_deferred_reg_list which is used to + * register early initialized watchdogs. * * The watchdog_device structure contains all information about a * watchdog timer device. @@ -95,6 +97,7 @@ struct watchdog_device { #define WDOG_ALLOW_RELEASE 2 /* Did we receive the magic char ? */ #define WDOG_NO_WAY_OUT 3 /* Is 'nowayout' feature set ? */ #define WDOG_UNREGISTERED 4 /* Has the device been unregistered */ + struct list_head deferred; }; #define WATCHDOG_NOWAYOUT IS_BUILTIN(CONFIG_WATCHDOG_NOWAYOUT) -- cgit v1.2.3 From a9730fca9946f3697410479e0ef1bd759ba00a77 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 29 Jun 2015 09:28:08 -0500 Subject: Fix kmalloc slab creation sequence This patch restores the slab creation sequence that was broken by commit 4066c33d0308f8 and also reverts the portions that introduced the KMALLOC_LOOP_XXX macros. Those can never really work since the slab creation is much more complex than just going from a minimum to a maximum number. The latest upstream kernel boots cleanly on my machine with a 64 bit x86 configuration under KVM using either SLAB or SLUB. Fixes: 4066c33d0308f8 ("support the slub_debug boot option") Reported-by: Theodore Ts'o Signed-off-by: Christoph Lameter Signed-off-by: Linus Torvalds --- include/linux/slab.h | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 9de2fdc8b5e4..a99f0e5243e1 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -153,30 +153,8 @@ size_t ksize(const void *); #define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN #define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN #define KMALLOC_SHIFT_LOW ilog2(ARCH_DMA_MINALIGN) -/* - * The KMALLOC_LOOP_LOW is the definition for the for loop index start number - * to create the kmalloc_caches object in create_kmalloc_caches(). The first - * and the second are 96 and 192. You can see that in the kmalloc_index(), if - * the KMALLOC_MIN_SIZE <= 32, then return 1 (96). If KMALLOC_MIN_SIZE <= 64, - * then return 2 (192). If the KMALLOC_MIN_SIZE is bigger than 64, we don't - * need to initialize 96 and 192. Go directly to start the KMALLOC_SHIFT_LOW. - */ -#if KMALLOC_MIN_SIZE <= 32 -#define KMALLOC_LOOP_LOW 1 -#elif KMALLOC_MIN_SIZE <= 64 -#define KMALLOC_LOOP_LOW 2 -#else -#define KMALLOC_LOOP_LOW KMALLOC_SHIFT_LOW -#endif - #else #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) -/* - * The KMALLOC_MIN_SIZE of slub/slab/slob is 2^3/2^5/2^3. So, even slab is used. - * The KMALLOC_MIN_SIZE <= 32. The kmalloc-96 and kmalloc-192 should also be - * initialized. - */ -#define KMALLOC_LOOP_LOW 1 #endif /* -- cgit v1.2.3 From 31f02455455d405320e2f749696bef4e02903b35 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 30 Jun 2015 12:07:17 -0400 Subject: sparse: fix misplaced __pmem definition Move the definition of __pmem outside of CONFIG_SPARSE_RCU_POINTER to fix: drivers/nvdimm/pmem.c:198:17: sparse: too many arguments for function __builtin_expect drivers/nvdimm/pmem.c:36:33: sparse: expected ; at end of declaration drivers/nvdimm/pmem.c:48:21: sparse: void declaration ...due to __pmem failing to be defined in some configurations when CONFIG_SPARSE_RCU_POINTER=y. Reported-by: kbuild test robot Reported-by: Dan Carpenter Signed-off-by: Dan Williams --- include/linux/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 26fc8bc77f85..d8fbd500330e 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -17,11 +17,11 @@ # define __release(x) __context__(x,-1) # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) # define __percpu __attribute__((noderef, address_space(3))) +# define __pmem __attribute__((noderef, address_space(5))) #ifdef CONFIG_SPARSE_RCU_POINTER # define __rcu __attribute__((noderef, address_space(4))) #else # define __rcu -# define __pmem __attribute__((noderef, address_space(5))) #endif extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); -- cgit v1.2.3 From 8e7a7f8619f1f93736d9bb7e31caf4721bdc739d Mon Sep 17 00:00:00 2001 From: Robin Holt Date: Tue, 30 Jun 2015 14:56:41 -0700 Subject: memblock: introduce a for_each_reserved_mem_region iterator Struct page initialisation had been identified as one of the reasons why large machines take a long time to boot. Patches were posted a long time ago to defer initialisation until they were first used. This was rejected on the grounds it should not be necessary to hurt the fast paths. This series reuses much of the work from that time but defers the initialisation of memory to kswapd so that one thread per node initialises memory local to that node. After applying the series and setting the appropriate Kconfig variable I see this in the boot log on a 64G machine [ 7.383764] kswapd 0 initialised deferred memory in 188ms [ 7.404253] kswapd 1 initialised deferred memory in 208ms [ 7.411044] kswapd 3 initialised deferred memory in 216ms [ 7.411551] kswapd 2 initialised deferred memory in 216ms On a 1TB machine, I see [ 8.406511] kswapd 3 initialised deferred memory in 1116ms [ 8.428518] kswapd 1 initialised deferred memory in 1140ms [ 8.435977] kswapd 0 initialised deferred memory in 1148ms [ 8.437416] kswapd 2 initialised deferred memory in 1148ms Once booted the machine appears to work as normal. Boot times were measured from the time shutdown was called until ssh was available again. In the 64G case, the boot time savings are negligible. On the 1TB machine, the savings were 16 seconds. Nate Zimmer said: : On an older 8 TB box with lots and lots of cpus the boot time, as : measure from grub to login prompt, the boot time improved from 1484 : seconds to exactly 1000 seconds. Waiman Long said: : I ran a bootup timing test on a 12-TB 16-socket IvyBridge-EX system. From : grub menu to ssh login, the bootup time was 453s before the patch and 265s : after the patch - a saving of 188s (42%). Daniel Blueman said: : On a 7TB, 1728-core NumaConnect system with 108 NUMA nodes, we're seeing : stock 4.0 boot in 7136s. This drops to 2159s, or a 70% reduction with : this patchset. Non-temporal PMD init (https://lkml.org/lkml/2015/4/23/350) : drops this to 1045s. This patch (of 13): As part of initializing struct page's in 2MiB chunks, we noticed that at the end of free_all_bootmem(), there was nothing which had forced the reserved/allocated 4KiB pages to be initialized. This helper function will be used for that expansion. Signed-off-by: Robin Holt Signed-off-by: Nate Zimmer Signed-off-by: Mel Gorman Tested-by: Nate Zimmer Tested-by: Waiman Long Tested-by: Daniel J Blueman Acked-by: Pekka Enberg Cc: Robin Holt Cc: Dave Hansen Cc: Waiman Long Cc: Scott Norton Cc: "Luck, Tony" Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 0215ffd63069..cc4b01972060 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -101,6 +101,9 @@ void __next_mem_range_rev(u64 *idx, int nid, ulong flags, struct memblock_type *type_b, phys_addr_t *out_start, phys_addr_t *out_end, int *out_nid); +void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start, + phys_addr_t *out_end); + /** * for_each_mem_range - iterate through memblock areas from type_a and not * included in type_b. Or just type_a if type_b is NULL. @@ -142,6 +145,21 @@ void __next_mem_range_rev(u64 *idx, int nid, ulong flags, __next_mem_range_rev(&i, nid, flags, type_a, type_b, \ p_start, p_end, p_nid)) +/** + * for_each_reserved_mem_region - iterate over all reserved memblock areas + * @i: u64 used as loop variable + * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL + * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL + * + * Walks over reserved areas of memblock. Available as soon as memblock + * is initialized. + */ +#define for_each_reserved_mem_region(i, p_start, p_end) \ + for (i = 0UL, \ + __next_reserved_mem_region(&i, p_start, p_end); \ + i != (u64)ULLONG_MAX; \ + __next_reserved_mem_region(&i, p_start, p_end)) + #ifdef CONFIG_MOVABLE_NODE static inline bool memblock_is_hotpluggable(struct memblock_region *m) { -- cgit v1.2.3 From 92923ca3aacef63c92dc297a75ad0c6dfe4eab37 Mon Sep 17 00:00:00 2001 From: Nathan Zimmer Date: Tue, 30 Jun 2015 14:56:48 -0700 Subject: mm: meminit: only set page reserved in the memblock region Currently each page struct is set as reserved upon initialization. This patch leaves the reserved bit clear and only sets the reserved bit when it is known the memory was allocated by the bootmem allocator. This makes it easier to distinguish between uninitialised struct pages and reserved struct pages in later patches. Signed-off-by: Robin Holt Signed-off-by: Nathan Zimmer Signed-off-by: Mel Gorman Tested-by: Nate Zimmer Tested-by: Waiman Long Tested-by: Daniel J Blueman Acked-by: Pekka Enberg Cc: Robin Holt Cc: Dave Hansen Cc: Waiman Long Cc: Scott Norton Cc: "Luck, Tony" Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 99959a34f4f1..d662af2d0d01 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1635,6 +1635,8 @@ extern void free_highmem_page(struct page *page); extern void adjust_managed_page_count(struct page *page, long count); extern void mem_init_print_info(const char *str); +extern void reserve_bootmem_region(unsigned long start, unsigned long end); + /* Free the reserved page into the buddy system, so it gets managed. */ static inline void __free_reserved_page(struct page *page) { -- cgit v1.2.3 From 8a942fdea560d4ac0e9d9fabcd5201ad20e0c382 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 30 Jun 2015 14:56:55 -0700 Subject: mm: meminit: make __early_pfn_to_nid SMP-safe and introduce meminit_pfn_in_nid __early_pfn_to_nid() use static variables to cache recent lookups as memblock lookups are very expensive but it assumes that memory initialisation is single-threaded. Parallel initialisation of struct pages will break that assumption so this patch makes __early_pfn_to_nid() SMP-safe by requiring the caller to cache recent search information. early_pfn_to_nid() keeps the same interface but is only safe to use early in boot due to the use of a global static variable. meminit_pfn_in_nid() is an SMP-safe version that callers must maintain their own state for. Signed-off-by: Mel Gorman Tested-by: Nate Zimmer Tested-by: Waiman Long Tested-by: Daniel J Blueman Acked-by: Pekka Enberg Cc: Robin Holt Cc: Nate Zimmer Cc: Dave Hansen Cc: Waiman Long Cc: Scott Norton Cc: "Luck, Tony" Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 6 ++++-- include/linux/mmzone.h | 16 +++++++++++++++- 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index d662af2d0d01..2e872f92dbac 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1726,7 +1726,8 @@ extern void sparse_memory_present_with_active_regions(int nid); #if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \ !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) -static inline int __early_pfn_to_nid(unsigned long pfn) +static inline int __early_pfn_to_nid(unsigned long pfn, + struct mminit_pfnnid_cache *state) { return 0; } @@ -1734,7 +1735,8 @@ static inline int __early_pfn_to_nid(unsigned long pfn) /* please see mm/page_alloc.c */ extern int __meminit early_pfn_to_nid(unsigned long pfn); /* there is a per-arch backend function. */ -extern int __meminit __early_pfn_to_nid(unsigned long pfn); +extern int __meminit __early_pfn_to_nid(unsigned long pfn, + struct mminit_pfnnid_cache *state); #endif extern void set_dma_reserve(unsigned long new_dma_reserve); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 54d74f6eb233..b2473d822549 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1216,10 +1216,24 @@ void sparse_init(void); #define sparse_index_init(_sec, _nid) do {} while (0) #endif /* CONFIG_SPARSEMEM */ +/* + * During memory init memblocks map pfns to nids. The search is expensive and + * this caches recent lookups. The implementation of __early_pfn_to_nid + * may treat start/end as pfns or sections. + */ +struct mminit_pfnnid_cache { + unsigned long last_start; + unsigned long last_end; + int last_nid; +}; + #ifdef CONFIG_NODES_SPAN_OTHER_NODES bool early_pfn_in_nid(unsigned long pfn, int nid); +bool meminit_pfn_in_nid(unsigned long pfn, int node, + struct mminit_pfnnid_cache *state); #else -#define early_pfn_in_nid(pfn, nid) (1) +#define early_pfn_in_nid(pfn, nid) (1) +#define meminit_pfn_in_nid(pfn, nid, state) (1) #endif #ifndef early_pfn_valid -- cgit v1.2.3 From 75a592a47129dcfc1aec40e7d3cdf239a767d441 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 30 Jun 2015 14:56:59 -0700 Subject: mm: meminit: inline some helper functions early_pfn_in_nid() and meminit_pfn_in_nid() are small functions that are unnecessarily visible outside memory initialisation. As well as unnecessary visibility, it's unnecessary function call overhead when initialising pages. This patch moves the helpers inline. [akpm@linux-foundation.org: fix build] [mhocko@suse.cz: fix build] Signed-off-by: Mel Gorman Tested-by: Nate Zimmer Tested-by: Waiman Long Tested-by: Daniel J Blueman Acked-by: Pekka Enberg Cc: Robin Holt Cc: Nate Zimmer Cc: Dave Hansen Cc: Waiman Long Cc: Scott Norton Cc: "Luck, Tony" Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Signed-off-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index b2473d822549..1e05dc7449cd 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1227,15 +1227,6 @@ struct mminit_pfnnid_cache { int last_nid; }; -#ifdef CONFIG_NODES_SPAN_OTHER_NODES -bool early_pfn_in_nid(unsigned long pfn, int nid); -bool meminit_pfn_in_nid(unsigned long pfn, int node, - struct mminit_pfnnid_cache *state); -#else -#define early_pfn_in_nid(pfn, nid) (1) -#define meminit_pfn_in_nid(pfn, nid, state) (1) -#endif - #ifndef early_pfn_valid #define early_pfn_valid(pfn) (1) #endif -- cgit v1.2.3 From 3a80a7fa7989fbb6aa56bb6ad31811b62cf99e60 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 30 Jun 2015 14:57:02 -0700 Subject: mm: meminit: initialise a subset of struct pages if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set This patch initalises all low memory struct pages and 2G of the highest zone on each node during memory initialisation if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set. That config option cannot be set but will be available in a later patch. Parallel initialisation of struct page depends on some features from memory hotplug and it is necessary to alter alter section annotations. Signed-off-by: Mel Gorman Tested-by: Nate Zimmer Tested-by: Waiman Long Tested-by: Daniel J Blueman Acked-by: Pekka Enberg Cc: Robin Holt Cc: Nate Zimmer Cc: Dave Hansen Cc: Waiman Long Cc: Scott Norton Cc: "Luck, Tony" Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 1e05dc7449cd..754c25966a0a 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -762,6 +762,14 @@ typedef struct pglist_data { /* Number of pages migrated during the rate limiting time interval */ unsigned long numabalancing_migrate_nr_pages; #endif + +#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT + /* + * If memory initialisation on large machines is deferred then this + * is the first PFN that needs to be initialised. + */ + unsigned long first_deferred_pfn; +#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ } pg_data_t; #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) -- cgit v1.2.3 From 0e1cc95b4cc7293bb7b39175035e7f7e45c90977 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 30 Jun 2015 14:57:27 -0700 Subject: mm: meminit: finish initialisation of struct pages before basic setup Waiman Long reported that 24TB machines hit OOM during basic setup when struct page initialisation was deferred. One approach is to initialise memory on demand but it interferes with page allocator paths. This patch creates dedicated threads to initialise memory before basic setup. It then blocks on a rw_semaphore until completion as a wait_queue and counter is overkill. This may be slower to boot but it's simplier overall and also gets rid of a section mangling which existed so kswapd could do the initialisation. [akpm@linux-foundation.org: include rwsem.h, use DECLARE_RWSEM, fix comment, remove unneeded cast] Signed-off-by: Mel Gorman Cc: Waiman Long Cc: Dave Hansen Cc: Scott Norton Tested-by: Daniel J Blueman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 6ba7cf23748f..ad35f300b9a4 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -384,6 +384,14 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp); void drain_all_pages(struct zone *zone); void drain_local_pages(struct zone *zone); +#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT +void page_alloc_init_late(void); +#else +static inline void page_alloc_init_late(void) +{ +} +#endif + /* * gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what * GFP flags are used before interrupts are enabled. Once interrupts are -- cgit v1.2.3 From 5375b708f2547f70cd2bee2fd8663ab7035f9551 Mon Sep 17 00:00:00 2001 From: HATAYAMA Daisuke Date: Tue, 30 Jun 2015 14:57:46 -0700 Subject: kernel/panic/kexec: fix "crash_kexec_post_notifiers" option issue in oops path Commit f06e5153f4ae2e ("kernel/panic.c: add "crash_kexec_post_notifiers" option for kdump after panic_notifers") introduced "crash_kexec_post_notifiers" kernel boot option, which toggles wheather panic() calls crash_kexec() before panic_notifiers and dump kmsg or after. The problem is that the commit overlooks panic_on_oops kernel boot option. If it is enabled, crash_kexec() is called directly without going through panic() in oops path. To fix this issue, this patch adds a check to "crash_kexec_post_notifiers" in the condition of kexec_should_crash(). Also, put a comment in kexec_should_crash() to explain not obvious things on this patch. Signed-off-by: HATAYAMA Daisuke Acked-by: Baoquan He Tested-by: Hidehiro Kawai Reviewed-by: Masami Hiramatsu Cc: Vivek Goyal Cc: Ingo Molnar Cc: Hidehiro Kawai Cc: Baoquan He Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 5acf5b70866d..0dfa4e31563d 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -439,6 +439,9 @@ extern int panic_on_unrecovered_nmi; extern int panic_on_io_nmi; extern int panic_on_warn; extern int sysctl_panic_on_stackoverflow; + +extern bool crash_kexec_post_notifiers; + /* * Only to be used by arch init code. If the user over-wrote the default * CONFIG_PANIC_TIMEOUT, honor it. -- cgit v1.2.3 From 2a1bf8f93b33992bb0457512b28d046e279bbd7e Mon Sep 17 00:00:00 2001 From: Dave Gordon Date: Tue, 30 Jun 2015 14:58:54 -0700 Subject: lib/scatterlist: mark input buffer parameters as 'const' The 'buf' parameter of sg(p)copy_from_buffer() can and should be const-qualified, although because of the shared implementation of _to_buffer() and _from_buffer(), we have to cast this away internally. This means that callers who have a 'const' buffer containing the data to be copied to the sg-list no longer have to cast away the const-ness themselves. It also enables improved coverage by code analysis tools. Signed-off-by: Dave Gordon Cc: Akinobu Mita Cc: "Martin K. Petersen" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/scatterlist.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 50a8486c524b..505d0481df1e 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -266,12 +266,12 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, gfp_t gfp_mask); size_t sg_copy_from_buffer(struct scatterlist *sgl, unsigned int nents, - void *buf, size_t buflen); + const void *buf, size_t buflen); size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, size_t buflen); size_t sg_pcopy_from_buffer(struct scatterlist *sgl, unsigned int nents, - void *buf, size_t buflen, off_t skip); + const void *buf, size_t buflen, off_t skip); size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, size_t buflen, off_t skip); -- cgit v1.2.3 From 386ecb1216f9e38947ce6a2af22e5e1e47256a97 Mon Sep 17 00:00:00 2001 From: Dave Gordon Date: Tue, 30 Jun 2015 14:58:57 -0700 Subject: drivers/scsi/scsi_debug.c: resolve sg buffer const-ness issue do_device_access() takes a separate parameter to indicate the direction of data transfer, which it used to use to select the appropriate function out of sg_pcopy_{to,from}_buffer(). However these two functions now have So this patch makes it bypass these wrappers and call the underlying function sg_copy_buffer() directly; this has the same calling style as do_device_access() i.e. a separate direction-of-transfer parameter and no pointers-to-const, so skipping the wrappers not only eliminates the warning, it also make the code simpler :) [akpm@linux-foundation.org: fix very broken build] Signed-off-by: Dave Gordon Acked-by: Arnd Bergmann Cc: James Bottomley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/scatterlist.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 505d0481df1e..9b1ef0c820a7 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -265,6 +265,9 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, unsigned long offset, unsigned long size, gfp_t gfp_mask); +size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, + size_t buflen, off_t skip, bool to_buffer); + size_t sg_copy_from_buffer(struct scatterlist *sgl, unsigned int nents, const void *buf, size_t buflen); size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents, -- cgit v1.2.3 From 0030edf296db8a7afb13573eb12977b7d399cd40 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Tue, 30 Jun 2015 15:00:03 -0700 Subject: genalloc: rename dev_get_gen_pool() to gen_pool_get() To be consistent with other genalloc interface namings, rename dev_get_gen_pool() to gen_pool_get(). The original omitted "dev_" prefix is removed, since it points to argument type of the function, and so it does not bring any useful information. [akpm@linux-foundation.org: update arch/arm/mach-socfpga/pm.c] Signed-off-by: Vladimir Zapolskiy Acked-by: Nicolas Ferre Cc: Philipp Zabel Cc: Shawn Guo Cc: Sascha Hauer Cc: Alexandre Belloni Cc: Russell King Cc: Mauro Carvalho Chehab Cc: Vinod Koul Cc: Takashi Iwai Cc: Jaroslav Kysela Cc: Mark Brown Cc: Nicolas Ferre Cc: Alan Tull Cc: Dinh Nguyen Cc: Kevin Hilman Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/genalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index 1ccaab44abcc..015d17068615 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -119,7 +119,7 @@ extern unsigned long gen_pool_best_fit(unsigned long *map, unsigned long size, extern struct gen_pool *devm_gen_pool_create(struct device *dev, int min_alloc_order, int nid); -extern struct gen_pool *dev_get_gen_pool(struct device *dev); +extern struct gen_pool *gen_pool_get(struct device *dev); bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start, size_t size); -- cgit v1.2.3 From abdd4a7025282fbe3737e1bcb5f51afc8d8ea1b8 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Tue, 30 Jun 2015 15:00:07 -0700 Subject: genalloc: rename of_get_named_gen_pool() to of_gen_pool_get() To be consistent with other kernel interface namings, rename of_get_named_gen_pool() to of_gen_pool_get(). In the original function name "_named" suffix references to a device tree property, which contains a phandle to a device and the corresponding device driver is assumed to register a gen_pool object. Due to a weak relation and to avoid any confusion (e.g. in future possible scenario if gen_pool objects are named) the suffix is removed. [sfr@canb.auug.org.au: crypto/marvell/cesa - fix up for of_get_named_gen_pool() rename] Signed-off-by: Vladimir Zapolskiy Cc: Nicolas Ferre Cc: Philipp Zabel Cc: Shawn Guo Cc: Sascha Hauer Cc: Alexandre Belloni Cc: Russell King Cc: Mauro Carvalho Chehab Cc: Vinod Koul Cc: Takashi Iwai Cc: Jaroslav Kysela Signed-off-by: Stephen Rothwell Cc: Herbert Xu Cc: Boris BREZILLON Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/genalloc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index 015d17068615..5383bb1394a1 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -125,10 +125,10 @@ bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start, size_t size); #ifdef CONFIG_OF -extern struct gen_pool *of_get_named_gen_pool(struct device_node *np, +extern struct gen_pool *of_gen_pool_get(struct device_node *np, const char *propname, int index); #else -static inline struct gen_pool *of_get_named_gen_pool(struct device_node *np, +static inline struct gen_pool *of_gen_pool_get(struct device_node *np, const char *propname, int index) { return NULL; -- cgit v1.2.3 From 8a81252b774b53e628a8a0fe18e2b8fc236d92cc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Jun 2015 15:54:08 +0200 Subject: fs/file.c: don't acquire files->file_lock in fd_install() Mateusz Guzik reported : Currently obtaining a new file descriptor results in locking fdtable twice - once in order to reserve a slot and second time to fill it. Holding the spinlock in __fd_install() is needed in case a resize is done, or to prevent a resize. Mateusz provided an RFC patch and a micro benchmark : http://people.redhat.com/~mguzik/pipebench.c A resize is an unlikely operation in a process lifetime, as table size is at least doubled at every resize. We can use RCU instead of the spinlock. __fd_install() must wait if a resize is in progress. The resize must block new __fd_install() callers from starting, and wait that ongoing install are finished (synchronize_sched()) resize should be attempted by a single thread to not waste resources. rcu_sched variant is used, as __fd_install() and expand_fdtable() run from process context. It gives us a ~30% speedup using pipebench on a dual Intel(R) Xeon(R) CPU E5-2696 v2 @ 2.50GHz Signed-off-by: Eric Dumazet Reported-by: Mateusz Guzik Acked-by: Mateusz Guzik Tested-by: Mateusz Guzik Signed-off-by: Al Viro --- include/linux/fdtable.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 230f87bdf5ad..fbb88740634a 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -47,6 +47,9 @@ struct files_struct { * read mostly part */ atomic_t count; + bool resize_in_progress; + wait_queue_head_t resize_wait; + struct fdtable __rcu *fdt; struct fdtable fdtab; /* -- cgit v1.2.3 From fbabfd0f4ee2e8847bf56edf481249ad1bb8c44d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 9 May 2015 15:54:49 -0500 Subject: fs: Add helper functions for permanently empty directories. To ensure it is safe to mount proc and sysfs I need to check if filesystems that are mounted on top of them are mounted on truly empty directories. Given that some directories can gain entries over time, knowing that a directory is empty right now is insufficient. Therefore add supporting infrastructure for permantently empty directories that proc and sysfs can use when they create mount points for filesystems and fs_fully_visible can use to test for permanently empty directories to ensure that nothing will be gained by mounting a fresh copy of proc or sysfs. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2d24eeb8e59c..571aab91bfc0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2780,6 +2780,8 @@ extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned in extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); extern const struct file_operations simple_dir_operations; extern const struct inode_operations simple_dir_inode_operations; +extern void make_empty_dir_inode(struct inode *inode); +extern bool is_empty_dir_inode(struct inode *inode); struct tree_descr { char *name; const struct file_operations *ops; int mode; }; struct dentry *d_alloc_name(struct dentry *, const char *); extern int simple_fill_super(struct super_block *, unsigned long, struct tree_descr *); -- cgit v1.2.3 From f9bd6733d3f11e24f3949becf277507d422ee1eb Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 9 May 2015 22:09:14 -0500 Subject: sysctl: Allow creating permanently empty directories that serve as mountpoints. Add a magic sysctl table sysctl_mount_point that when used to create a directory forces that directory to be permanently empty. Update the code to use make_empty_dir_inode when accessing permanently empty directories. Update the code to not allow adding to permanently empty directories. Update /proc/sys/fs/binfmt_misc to be a permanently empty directory. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- include/linux/sysctl.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 795d5fea5697..fa7bc29925c9 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -188,6 +188,9 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, void unregister_sysctl_table(struct ctl_table_header * table); extern int sysctl_init(void); + +extern struct ctl_table sysctl_mount_point[]; + #else /* CONFIG_SYSCTL */ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table) { -- cgit v1.2.3 From ea015218f2f7ace2dad9cedd21ed95bdba2886d7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 13 May 2015 16:09:29 -0500 Subject: kernfs: Add support for always empty directories. Add a new function kernfs_create_empty_dir that can be used to create directory that can not be modified. Update the code to use make_empty_dir_inode when reporting a permanently empty directory to the vfs. Update the code to not allow adding to permanently empty directories. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- include/linux/kernfs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 71ecdab1671b..29d1896c3ba5 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -45,6 +45,7 @@ enum kernfs_node_flag { KERNFS_LOCKDEP = 0x0100, KERNFS_SUICIDAL = 0x0400, KERNFS_SUICIDED = 0x0800, + KERNFS_EMPTY_DIR = 0x1000, }; /* @flags for kernfs_create_root() */ @@ -285,6 +286,8 @@ void kernfs_destroy_root(struct kernfs_root *root); struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, void *priv, const void *ns); +struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, + const char *name); struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode, loff_t size, -- cgit v1.2.3 From 87d2846fcf88113fae2341da1ca9a71f0d916f2c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 13 May 2015 16:31:40 -0500 Subject: sysfs: Add support for permanently empty directories to serve as mount points. Add two functions sysfs_create_mount_point and sysfs_remove_mount_point that hang a permanently empty directory off of a kobject or remove a permanently emptpy directory hanging from a kobject. Export these new functions so modular filesystems can use them. Cc: stable@vger.kernel.org Acked-by: Greg Kroah-Hartman Signed-off-by: "Eric W. Biederman" --- include/linux/sysfs.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 99382c0df17e..9f65758311a4 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -210,6 +210,10 @@ int __must_check sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, int __must_check sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, const void *new_ns); +int __must_check sysfs_create_mount_point(struct kobject *parent_kobj, + const char *name); +void sysfs_remove_mount_point(struct kobject *parent_kobj, + const char *name); int __must_check sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, @@ -298,6 +302,17 @@ static inline int sysfs_move_dir_ns(struct kobject *kobj, return 0; } +static inline int sysfs_create_mount_point(struct kobject *parent_kobj, + const char *name) +{ + return 0; +} + +static inline void sysfs_remove_mount_point(struct kobject *parent_kobj, + const char *name) +{ +} + static inline int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, const void *ns) -- cgit v1.2.3 From bd7ade3cd9b0850264306f5c2b79024a417b6396 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 2 Jul 2015 01:32:44 -0400 Subject: bufferhead: Add _gfp version for sb_getblk() sb_getblk() is used during ext4 (and possibly other FSes) writeback paths. Sometimes such path require allocating memory and guaranteeing that such allocation won't block. Currently, however, there is no way to provide user flags for sb_getblk which could lead to deadlocks. This patch implements a sb_getblk_gfp with the only difference it can accept user-provided GFP flags. Signed-off-by: Nikolay Borisov Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- include/linux/buffer_head.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 73b45225a7ca..e6797ded700e 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -317,6 +317,13 @@ sb_getblk(struct super_block *sb, sector_t block) return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE); } + +static inline struct buffer_head * +sb_getblk_gfp(struct super_block *sb, sector_t block, gfp_t gfp) +{ + return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, gfp); +} + static inline struct buffer_head * sb_find_get_block(struct super_block *sb, sector_t block) { -- cgit v1.2.3 From ec110bc7cc48d7806c9b65094e6afb19452d458f Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Thu, 7 May 2015 06:45:21 -0400 Subject: NTB: Move files in preparation for NTB abstraction This patch only moves files to their new locations, before applying the next two patches adding the NTB Abstraction layer. Splitting this patch from the next is intended make distinct which code is changed only due to moving the files, versus which are substantial code changes in adding the NTB Abstraction layer. Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- include/linux/ntb.h | 88 ------------------------------------------- include/linux/ntb_transport.h | 88 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 88 deletions(-) delete mode 100644 include/linux/ntb.h create mode 100644 include/linux/ntb_transport.h (limited to 'include/linux') diff --git a/include/linux/ntb.h b/include/linux/ntb.h deleted file mode 100644 index 9ac1a62fc6f5..000000000000 --- a/include/linux/ntb.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2012 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * BSD LICENSE - * - * Copyright(c) 2012 Intel Corporation. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copy - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Intel PCIe NTB Linux driver - * - * Contact Information: - * Jon Mason - */ - -struct ntb_transport_qp; - -struct ntb_client { - struct device_driver driver; - int (*probe)(struct pci_dev *pdev); - void (*remove)(struct pci_dev *pdev); -}; - -enum { - NTB_LINK_DOWN = 0, - NTB_LINK_UP, -}; - -int ntb_register_client(struct ntb_client *drvr); -void ntb_unregister_client(struct ntb_client *drvr); -int ntb_register_client_dev(char *device_name); -void ntb_unregister_client_dev(char *device_name); - -struct ntb_queue_handlers { - void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data, - void *data, int len); - void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data, - void *data, int len); - void (*event_handler)(void *data, int status); -}; - -unsigned char ntb_transport_qp_num(struct ntb_transport_qp *qp); -unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp); -struct ntb_transport_qp * -ntb_transport_create_queue(void *data, struct pci_dev *pdev, - const struct ntb_queue_handlers *handlers); -void ntb_transport_free_queue(struct ntb_transport_qp *qp); -int ntb_transport_rx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, - unsigned int len); -int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, - unsigned int len); -void *ntb_transport_rx_remove(struct ntb_transport_qp *qp, unsigned int *len); -void ntb_transport_link_up(struct ntb_transport_qp *qp); -void ntb_transport_link_down(struct ntb_transport_qp *qp); -bool ntb_transport_link_query(struct ntb_transport_qp *qp); diff --git a/include/linux/ntb_transport.h b/include/linux/ntb_transport.h new file mode 100644 index 000000000000..f78b64cc09d5 --- /dev/null +++ b/include/linux/ntb_transport.h @@ -0,0 +1,88 @@ +/* + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2012 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * BSD LICENSE + * + * Copyright(c) 2012 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copy + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Intel PCIe NTB Linux driver + * + * Contact Information: + * Jon Mason + */ + +struct ntb_transport_qp; + +struct ntb_client { + struct device_driver driver; + int (*probe)(struct pci_dev *pdev); + void (*remove)(struct pci_dev *pdev); +}; + +enum { + NTB_LINK_DOWN = 0, + NTB_LINK_UP, +}; + +int ntb_transport_register_client(struct ntb_client *drvr); +void ntb_transport_unregister_client(struct ntb_client *drvr); +int ntb_register_client_dev(char *device_name); +void ntb_unregister_client_dev(char *device_name); + +struct ntb_queue_handlers { + void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data, + void *data, int len); + void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data, + void *data, int len); + void (*event_handler)(void *data, int status); +}; + +unsigned char ntb_transport_qp_num(struct ntb_transport_qp *qp); +unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp); +struct ntb_transport_qp * +ntb_transport_create_queue(void *data, struct pci_dev *pdev, + const struct ntb_queue_handlers *handlers); +void ntb_transport_free_queue(struct ntb_transport_qp *qp); +int ntb_transport_rx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, + unsigned int len); +int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, + unsigned int len); +void *ntb_transport_rx_remove(struct ntb_transport_qp *qp, unsigned int *len); +void ntb_transport_link_up(struct ntb_transport_qp *qp); +void ntb_transport_link_down(struct ntb_transport_qp *qp); +bool ntb_transport_link_query(struct ntb_transport_qp *qp); -- cgit v1.2.3 From a13f35e8714009145e32ebe2bf25b84e1376e314 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 2 Jul 2015 08:44:34 -0600 Subject: writeback: don't embed root bdi_writeback_congested in bdi_writeback 52ebea749aae ("writeback: make backing_dev_info host cgroup-specific bdi_writebacks") made bdi (backing_dev_info) host per-cgroup wb's (bdi_writeback's). As the congested state needs to be per-wb and referenced from blkcg side and multiple wbs, the patch made all non-root cong's (bdi_writeback_congested's) reference counted and indexed on bdi. When a bdi is destroyed, cgwb_bdi_destroy() tries to drain all non-root cong's; however, this can hang indefinitely because wb's can also be referenced from blkcg_gq's which are destroyed after bdi destruction is complete. To fix the bug, bdi destruction will be updated to not wait for cong's to drain, which naturally means that cong's may outlive the associated bdi. This is fine for non-root cong's but is problematic for the root cong's which are embedded in their bdi's as they may end up getting dereferenced after the containing bdi's are freed. This patch makes root cong's behave the same as non-root cong's. They are no longer embedded in their bdi's but allocated separately during bdi initialization, indexed and reference counted the same way. * As cong handling is the same for all wb's, wb->congested initialization is moved into wb_init(). * When !CONFIG_CGROUP_WRITEBACK, there was no indexing or refcnting. bdi->wb_congested is now a pointer pointing to the root cong allocated during bdi init and minimal refcnting operations are implemented. * The above makes root wb init paths diverge depending on CONFIG_CGROUP_WRITEBACK. root wb init is moved to cgwb_bdi_init(). This patch in itself shouldn't cause any consequential behavior differences but prepares for the actual fix. Signed-off-by: Tejun Heo Reported-by: Jon Christopherson Link: https://bugzilla.kernel.org/show_bug.cgi?id=100681 Tested-by: Jon Christopherson Added include to backing-dev.h for kfree() definition. Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 5 +++-- include/linux/backing-dev.h | 6 +++++- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index a48d90e3bcbb..a23209b43842 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -50,10 +50,10 @@ enum wb_stat_item { */ struct bdi_writeback_congested { unsigned long state; /* WB_[a]sync_congested flags */ + atomic_t refcnt; /* nr of attached wb's and blkg */ #ifdef CONFIG_CGROUP_WRITEBACK struct backing_dev_info *bdi; /* the associated bdi */ - atomic_t refcnt; /* nr of attached wb's and blkg */ int blkcg_id; /* ID of the associated blkcg */ struct rb_node rb_node; /* on bdi->cgwb_congestion_tree */ #endif @@ -150,11 +150,12 @@ struct backing_dev_info { atomic_long_t tot_write_bandwidth; struct bdi_writeback wb; /* the root writeback info for this bdi */ - struct bdi_writeback_congested wb_congested; /* its congested state */ #ifdef CONFIG_CGROUP_WRITEBACK struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */ struct rb_root cgwb_congested_tree; /* their congested states */ atomic_t usage_cnt; /* counts both cgwbs and cgwb_contested's */ +#else + struct bdi_writeback_congested *wb_congested; #endif wait_queue_head_t wb_waitq; diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 0e6d4828a77a..0fe9df983ab7 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -15,6 +15,7 @@ #include #include #include +#include int __must_check bdi_init(struct backing_dev_info *bdi); void bdi_destroy(struct backing_dev_info *bdi); @@ -465,11 +466,14 @@ static inline bool inode_cgwb_enabled(struct inode *inode) static inline struct bdi_writeback_congested * wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp) { - return bdi->wb.congested; + atomic_inc(&bdi->wb_congested->refcnt); + return bdi->wb_congested; } static inline void wb_congested_put(struct bdi_writeback_congested *congested) { + if (atomic_dec_and_test(&congested->refcnt)) + kfree(congested); } static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi) -- cgit v1.2.3 From 91e20b5040c67c51aad88cf87db4305c5bd7f79d Mon Sep 17 00:00:00 2001 From: Joel Porquet Date: Thu, 2 Jul 2015 15:32:00 -0400 Subject: irqchip: Move IRQCHIP_DECLARE macro to include/linux/irqchip.h At the moment the IRQCHIP_DECLARE macro is only declared locally in drivers/irqchip/irqchip.h. It prevents from using it directly in arch/* directories whenever irqchip drivers only exist there, which happens in a few cases (e.g. arc, arm, microblaze and mips). This patch makes the macro to be globally defined, i.e. in include/linux/irqchip.h, and thus usable for arch-specific declarations of irqchip drivers. In this way, it is very similar to what clocksource does (ie CLOCKSOURCE_OF_DECLARE is defined in include/linux/clocksource.h). For now, this patch only moves the declaration of the macro IRQCHIP_DECLARE to the global header 'include/linux/irqchip.h' and make 'drivers/irqchip/irqchip.h' include 'include/linux/irqchip.h'. Later, other patches will get rid of 'drivers/irqchip/irqchip.h' and modify all the impacted irqchip drivers. Signed-off-by: Joel Porquet Cc: Jason Cooper Link: http://lkml.kernel.org/r/1435865565-14114-1-git-send-email-joel@porquet.org Signed-off-by: Thomas Gleixner --- include/linux/irqchip.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip.h b/include/linux/irqchip.h index 14d79131f53d..638887376e58 100644 --- a/include/linux/irqchip.h +++ b/include/linux/irqchip.h @@ -11,6 +11,20 @@ #ifndef _LINUX_IRQCHIP_H #define _LINUX_IRQCHIP_H +#include + +/* + * This macro must be used by the different irqchip drivers to declare + * the association between their DT compatible string and their + * initialization function. + * + * @name: name that must be unique accross all IRQCHIP_DECLARE of the + * same file. + * @compstr: compatible string of the irqchip driver + * @fn: initialization function + */ +#define IRQCHIP_DECLARE(name, compat, fn) OF_DECLARE_2(irqchip, name, compat, fn) + #ifdef CONFIG_IRQCHIP void irqchip_init(void); #else -- cgit v1.2.3 From 2ecd9d29abb171d6e97a4f3eb29d7456a11401b7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 3 Jul 2015 18:53:58 +0200 Subject: sched, preempt_notifier: separate notifier registration from static_key inc/dec Commit 1cde2930e154 ("sched/preempt: Add static_key() to preempt_notifiers") had two problems. First, the preempt-notifier API needs to sleep with the addition of the static_key, we do however need to hold off preemption while modifying the preempt notifier list, otherwise a preemption could observe an inconsistent list state. KVM correctly registers and unregisters preempt notifiers with preemption disabled, so the sleep caused dmesg splats. Second, KVM registers and unregisters preemption notifiers very often (in vcpu_load/vcpu_put). With a single uniprocessor guest the static key would move between 0 and 1 continuously, hitting the slow path on every userspace exit. To fix this, wrap the static_key inc/dec in a new API, and call it from KVM. Fixes: 1cde2930e154 ("sched/preempt: Add static_key() to preempt_notifiers") Reported-by: Pontus Fuchs Reported-by: Takashi Iwai Tested-by: Takashi Iwai Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Paolo Bonzini --- include/linux/preempt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 0f1534acaf60..84991f185173 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -293,6 +293,8 @@ struct preempt_notifier { struct preempt_ops *ops; }; +void preempt_notifier_inc(void); +void preempt_notifier_dec(void); void preempt_notifier_register(struct preempt_notifier *notifier); void preempt_notifier_unregister(struct preempt_notifier *notifier); -- cgit v1.2.3 From f6db8347993256b58bd4746b0c4c5b935c32210d Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 25 Jun 2015 23:53:37 +0530 Subject: sched/stat: Simplify the sched_info accounting dependency Both CONFIG_SCHEDSTATS=y and CONFIG_TASK_DELAY_ACCT=y track task sched_info, which results in ugly #if clauses. Simplify the code by introducing a synthethic CONFIG_SCHED_INFO switch, selected by both. Signed-off-by: Naveen N. Rao Cc: Balbir Singh Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Thomas Gleixner Cc: a.p.zijlstra@chello.nl Cc: ricklind@us.ibm.com Link: http://lkml.kernel.org/r/8d19eef800811a94b0f91bcbeb27430a884d7433.1435255405.git.naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6633e83e608a..9bf4bc0e3b8b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -849,7 +849,7 @@ extern struct user_struct root_user; struct backing_dev_info; struct reclaim_state; -#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) +#ifdef CONFIG_SCHED_INFO struct sched_info { /* cumulative counters */ unsigned long pcount; /* # of times run on this cpu */ @@ -859,7 +859,7 @@ struct sched_info { unsigned long long last_arrival,/* when we last ran on a cpu */ last_queued; /* when we were last queued to run */ }; -#endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */ +#endif /* CONFIG_SCHED_INFO */ #ifdef CONFIG_TASK_DELAY_ACCT struct task_delay_info { @@ -1408,7 +1408,7 @@ struct task_struct { int rcu_tasks_idle_cpu; #endif /* #ifdef CONFIG_TASKS_RCU */ -#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) +#ifdef CONFIG_SCHED_INFO struct sched_info sched_info; #endif -- cgit v1.2.3 From 6b55c9654fccf69ae7ace23ca101dc37b903181b Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Thu, 25 Jun 2015 22:51:41 +0530 Subject: sched/debug: Move print_cfs_rq() declaration to kernel/sched/sched.h Currently print_cfs_rq() is declared in include/linux/sched.h. However it's not used outside kernel/sched. Hence move the declaration to kernel/sched/sched.h Also some functions are only available for CONFIG_SCHED_DEBUG=y. Hence move the declarations to within the #ifdef. Signed-off-by: Srikar Dronamraju Acked-by: Rik van Riel Cc: Iulia Manda Cc: Linus Torvalds Cc: Mel Gorman Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435252903-1081-2-git-send-email-srikar@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 9bf4bc0e3b8b..3505352dd296 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -191,8 +191,6 @@ struct task_group; #ifdef CONFIG_SCHED_DEBUG extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); extern void proc_sched_set_task(struct task_struct *p); -extern void -print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); #endif /* -- cgit v1.2.3 From a1bd3baeb2f18b2b3d0f98ce5fdaa725149b950b Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Thu, 9 Apr 2015 10:33:20 -0400 Subject: NTB: Add NTB hardware abstraction layer Abstract the NTB device behind a programming interface, so that it can support different hardware and client drivers. Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- include/linux/ntb.h | 984 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 984 insertions(+) create mode 100644 include/linux/ntb.h (limited to 'include/linux') diff --git a/include/linux/ntb.h b/include/linux/ntb.h new file mode 100644 index 000000000000..b02f72bb8e32 --- /dev/null +++ b/include/linux/ntb.h @@ -0,0 +1,984 @@ +/* + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copy + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * PCIe NTB Linux driver + * + * Contact Information: + * Allen Hubbe + */ + +#ifndef _NTB_H_ +#define _NTB_H_ + +#include +#include + +struct ntb_client; +struct ntb_dev; +struct pci_dev; + +/** + * enum ntb_topo - NTB connection topology + * @NTB_TOPO_NONE: Topology is unknown or invalid. + * @NTB_TOPO_PRI: On primary side of local ntb. + * @NTB_TOPO_SEC: On secondary side of remote ntb. + * @NTB_TOPO_B2B_USD: On primary side of local ntb upstream of remote ntb. + * @NTB_TOPO_B2B_DSD: On primary side of local ntb downstream of remote ntb. + */ +enum ntb_topo { + NTB_TOPO_NONE = -1, + NTB_TOPO_PRI, + NTB_TOPO_SEC, + NTB_TOPO_B2B_USD, + NTB_TOPO_B2B_DSD, +}; + +static inline int ntb_topo_is_b2b(enum ntb_topo topo) +{ + switch ((int)topo) { + case NTB_TOPO_B2B_USD: + case NTB_TOPO_B2B_DSD: + return 1; + } + return 0; +} + +static inline char *ntb_topo_string(enum ntb_topo topo) +{ + switch (topo) { + case NTB_TOPO_NONE: return "NTB_TOPO_NONE"; + case NTB_TOPO_PRI: return "NTB_TOPO_PRI"; + case NTB_TOPO_SEC: return "NTB_TOPO_SEC"; + case NTB_TOPO_B2B_USD: return "NTB_TOPO_B2B_USD"; + case NTB_TOPO_B2B_DSD: return "NTB_TOPO_B2B_DSD"; + } + return "NTB_TOPO_INVALID"; +} + +/** + * enum ntb_speed - NTB link training speed + * @NTB_SPEED_AUTO: Request the max supported speed. + * @NTB_SPEED_NONE: Link is not trained to any speed. + * @NTB_SPEED_GEN1: Link is trained to gen1 speed. + * @NTB_SPEED_GEN2: Link is trained to gen2 speed. + * @NTB_SPEED_GEN3: Link is trained to gen3 speed. + */ +enum ntb_speed { + NTB_SPEED_AUTO = -1, + NTB_SPEED_NONE = 0, + NTB_SPEED_GEN1 = 1, + NTB_SPEED_GEN2 = 2, + NTB_SPEED_GEN3 = 3, +}; + +/** + * enum ntb_width - NTB link training width + * @NTB_WIDTH_AUTO: Request the max supported width. + * @NTB_WIDTH_NONE: Link is not trained to any width. + * @NTB_WIDTH_1: Link is trained to 1 lane width. + * @NTB_WIDTH_2: Link is trained to 2 lane width. + * @NTB_WIDTH_4: Link is trained to 4 lane width. + * @NTB_WIDTH_8: Link is trained to 8 lane width. + * @NTB_WIDTH_12: Link is trained to 12 lane width. + * @NTB_WIDTH_16: Link is trained to 16 lane width. + * @NTB_WIDTH_32: Link is trained to 32 lane width. + */ +enum ntb_width { + NTB_WIDTH_AUTO = -1, + NTB_WIDTH_NONE = 0, + NTB_WIDTH_1 = 1, + NTB_WIDTH_2 = 2, + NTB_WIDTH_4 = 4, + NTB_WIDTH_8 = 8, + NTB_WIDTH_12 = 12, + NTB_WIDTH_16 = 16, + NTB_WIDTH_32 = 32, +}; + +/** + * struct ntb_client_ops - ntb client operations + * @probe: Notify client of a new device. + * @remove: Notify client to remove a device. + */ +struct ntb_client_ops { + int (*probe)(struct ntb_client *client, struct ntb_dev *ntb); + void (*remove)(struct ntb_client *client, struct ntb_dev *ntb); +}; + +static inline int ntb_client_ops_is_valid(const struct ntb_client_ops *ops) +{ + /* commented callbacks are not required: */ + return + ops->probe && + ops->remove && + 1; +} + +/** + * struct ntb_ctx_ops - ntb driver context operations + * @link_event: See ntb_link_event(). + * @db_event: See ntb_db_event(). + */ +struct ntb_ctx_ops { + void (*link_event)(void *ctx); + void (*db_event)(void *ctx, int db_vector); +}; + +static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops) +{ + /* commented callbacks are not required: */ + return + /* ops->link_event && */ + /* ops->db_event && */ + 1; +} + +/** + * struct ntb_ctx_ops - ntb device operations + * @mw_count: See ntb_mw_count(). + * @mw_get_range: See ntb_mw_get_range(). + * @mw_set_trans: See ntb_mw_set_trans(). + * @mw_clear_trans: See ntb_mw_clear_trans(). + * @link_is_up: See ntb_link_is_up(). + * @link_enable: See ntb_link_enable(). + * @link_disable: See ntb_link_disable(). + * @db_is_unsafe: See ntb_db_is_unsafe(). + * @db_valid_mask: See ntb_db_valid_mask(). + * @db_vector_count: See ntb_db_vector_count(). + * @db_vector_mask: See ntb_db_vector_mask(). + * @db_read: See ntb_db_read(). + * @db_set: See ntb_db_set(). + * @db_clear: See ntb_db_clear(). + * @db_read_mask: See ntb_db_read_mask(). + * @db_set_mask: See ntb_db_set_mask(). + * @db_clear_mask: See ntb_db_clear_mask(). + * @peer_db_addr: See ntb_peer_db_addr(). + * @peer_db_read: See ntb_peer_db_read(). + * @peer_db_set: See ntb_peer_db_set(). + * @peer_db_clear: See ntb_peer_db_clear(). + * @peer_db_read_mask: See ntb_peer_db_read_mask(). + * @peer_db_set_mask: See ntb_peer_db_set_mask(). + * @peer_db_clear_mask: See ntb_peer_db_clear_mask(). + * @spad_is_unsafe: See ntb_spad_is_unsafe(). + * @spad_count: See ntb_spad_count(). + * @spad_read: See ntb_spad_read(). + * @spad_write: See ntb_spad_write(). + * @peer_spad_addr: See ntb_peer_spad_addr(). + * @peer_spad_read: See ntb_peer_spad_read(). + * @peer_spad_write: See ntb_peer_spad_write(). + */ +struct ntb_dev_ops { + int (*mw_count)(struct ntb_dev *ntb); + int (*mw_get_range)(struct ntb_dev *ntb, int idx, + phys_addr_t *base, resource_size_t *size, + resource_size_t *align, resource_size_t *align_size); + int (*mw_set_trans)(struct ntb_dev *ntb, int idx, + dma_addr_t addr, resource_size_t size); + int (*mw_clear_trans)(struct ntb_dev *ntb, int idx); + + int (*link_is_up)(struct ntb_dev *ntb, + enum ntb_speed *speed, enum ntb_width *width); + int (*link_enable)(struct ntb_dev *ntb, + enum ntb_speed max_speed, enum ntb_width max_width); + int (*link_disable)(struct ntb_dev *ntb); + + int (*db_is_unsafe)(struct ntb_dev *ntb); + u64 (*db_valid_mask)(struct ntb_dev *ntb); + int (*db_vector_count)(struct ntb_dev *ntb); + u64 (*db_vector_mask)(struct ntb_dev *ntb, int db_vector); + + u64 (*db_read)(struct ntb_dev *ntb); + int (*db_set)(struct ntb_dev *ntb, u64 db_bits); + int (*db_clear)(struct ntb_dev *ntb, u64 db_bits); + + u64 (*db_read_mask)(struct ntb_dev *ntb); + int (*db_set_mask)(struct ntb_dev *ntb, u64 db_bits); + int (*db_clear_mask)(struct ntb_dev *ntb, u64 db_bits); + + int (*peer_db_addr)(struct ntb_dev *ntb, + phys_addr_t *db_addr, resource_size_t *db_size); + u64 (*peer_db_read)(struct ntb_dev *ntb); + int (*peer_db_set)(struct ntb_dev *ntb, u64 db_bits); + int (*peer_db_clear)(struct ntb_dev *ntb, u64 db_bits); + + u64 (*peer_db_read_mask)(struct ntb_dev *ntb); + int (*peer_db_set_mask)(struct ntb_dev *ntb, u64 db_bits); + int (*peer_db_clear_mask)(struct ntb_dev *ntb, u64 db_bits); + + int (*spad_is_unsafe)(struct ntb_dev *ntb); + int (*spad_count)(struct ntb_dev *ntb); + + u32 (*spad_read)(struct ntb_dev *ntb, int idx); + int (*spad_write)(struct ntb_dev *ntb, int idx, u32 val); + + int (*peer_spad_addr)(struct ntb_dev *ntb, int idx, + phys_addr_t *spad_addr); + u32 (*peer_spad_read)(struct ntb_dev *ntb, int idx); + int (*peer_spad_write)(struct ntb_dev *ntb, int idx, u32 val); +}; + +static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) +{ + /* commented callbacks are not required: */ + return + ops->mw_count && + ops->mw_get_range && + ops->mw_set_trans && + /* ops->mw_clear_trans && */ + ops->link_is_up && + ops->link_enable && + ops->link_disable && + /* ops->db_is_unsafe && */ + ops->db_valid_mask && + + /* both set, or both unset */ + (!ops->db_vector_count == !ops->db_vector_mask) && + + ops->db_read && + /* ops->db_set && */ + ops->db_clear && + /* ops->db_read_mask && */ + ops->db_set_mask && + ops->db_clear_mask && + ops->peer_db_addr && + /* ops->peer_db_read && */ + ops->peer_db_set && + /* ops->peer_db_clear && */ + /* ops->peer_db_read_mask && */ + /* ops->peer_db_set_mask && */ + /* ops->peer_db_clear_mask && */ + /* ops->spad_is_unsafe && */ + ops->spad_count && + ops->spad_read && + ops->spad_write && + ops->peer_spad_addr && + /* ops->peer_spad_read && */ + ops->peer_spad_write && + 1; +} + +/** + * struct ntb_client - client interested in ntb devices + * @drv: Linux driver object. + * @ops: See &ntb_client_ops. + */ +struct ntb_client { + struct device_driver drv; + const struct ntb_client_ops ops; +}; + +#define drv_ntb_client(__drv) container_of((__drv), struct ntb_client, drv) + +/** + * struct ntb_device - ntb device + * @dev: Linux device object. + * @pdev: Pci device entry of the ntb. + * @topo: Detected topology of the ntb. + * @ops: See &ntb_dev_ops. + * @ctx: See &ntb_ctx_ops. + * @ctx_ops: See &ntb_ctx_ops. + */ +struct ntb_dev { + struct device dev; + struct pci_dev *pdev; + enum ntb_topo topo; + const struct ntb_dev_ops *ops; + void *ctx; + const struct ntb_ctx_ops *ctx_ops; + + /* private: */ + + /* synchronize setting, clearing, and calling ctx_ops */ + spinlock_t ctx_lock; + /* block unregister until device is fully released */ + struct completion released; +}; + +#define dev_ntb(__dev) container_of((__dev), struct ntb_dev, dev) + +/** + * ntb_register_client() - register a client for interest in ntb devices + * @client: Client context. + * + * The client will be added to the list of clients interested in ntb devices. + * The client will be notified of any ntb devices that are not already + * associated with a client, or if ntb devices are registered later. + * + * Return: Zero if the client is registered, otherwise an error number. + */ +#define ntb_register_client(client) \ + __ntb_register_client((client), THIS_MODULE, KBUILD_MODNAME) + +int __ntb_register_client(struct ntb_client *client, struct module *mod, + const char *mod_name); + +/** + * ntb_unregister_client() - unregister a client for interest in ntb devices + * @client: Client context. + * + * The client will be removed from the list of clients interested in ntb + * devices. If any ntb devices are associated with the client, the client will + * be notified to remove those devices. + */ +void ntb_unregister_client(struct ntb_client *client); + +#define module_ntb_client(__ntb_client) \ + module_driver(__ntb_client, ntb_register_client, \ + ntb_unregister_client) + +/** + * ntb_register_device() - register a ntb device + * @ntb: NTB device context. + * + * The device will be added to the list of ntb devices. If any clients are + * interested in ntb devices, each client will be notified of the ntb device, + * until at most one client accepts the device. + * + * Return: Zero if the device is registered, otherwise an error number. + */ +int ntb_register_device(struct ntb_dev *ntb); + +/** + * ntb_register_device() - unregister a ntb device + * @ntb: NTB device context. + * + * The device will be removed from the list of ntb devices. If the ntb device + * is associated with a client, the client will be notified to remove the + * device. + */ +void ntb_unregister_device(struct ntb_dev *ntb); + +/** + * ntb_set_ctx() - associate a driver context with an ntb device + * @ntb: NTB device context. + * @ctx: Driver context. + * @ctx_ops: Driver context operations. + * + * Associate a driver context and operations with a ntb device. The context is + * provided by the client driver, and the driver may associate a different + * context with each ntb device. + * + * Return: Zero if the context is associated, otherwise an error number. + */ +int ntb_set_ctx(struct ntb_dev *ntb, void *ctx, + const struct ntb_ctx_ops *ctx_ops); + +/** + * ntb_clear_ctx() - disassociate any driver context from an ntb device + * @ntb: NTB device context. + * + * Clear any association that may exist between a driver context and the ntb + * device. + */ +void ntb_clear_ctx(struct ntb_dev *ntb); + +/** + * ntb_link_event() - notify driver context of a change in link status + * @ntb: NTB device context. + * + * Notify the driver context that the link status may have changed. The driver + * should call ntb_link_is_up() to get the current status. + */ +void ntb_link_event(struct ntb_dev *ntb); + +/** + * ntb_db_event() - notify driver context of a doorbell event + * @ntb: NTB device context. + * @vector: Interrupt vector number. + * + * Notify the driver context of a doorbell event. If hardware supports + * multiple interrupt vectors for doorbells, the vector number indicates which + * vector received the interrupt. The vector number is relative to the first + * vector used for doorbells, starting at zero, and must be less than + ** ntb_db_vector_count(). The driver may call ntb_db_read() to check which + * doorbell bits need service, and ntb_db_vector_mask() to determine which of + * those bits are associated with the vector number. + */ +void ntb_db_event(struct ntb_dev *ntb, int vector); + +/** + * ntb_mw_count() - get the number of memory windows + * @ntb: NTB device context. + * + * Hardware and topology may support a different number of memory windows. + * + * Return: the number of memory windows. + */ +static inline int ntb_mw_count(struct ntb_dev *ntb) +{ + return ntb->ops->mw_count(ntb); +} + +/** + * ntb_mw_get_range() - get the range of a memory window + * @ntb: NTB device context. + * @idx: Memory window number. + * @base: OUT - the base address for mapping the memory window + * @size: OUT - the size for mapping the memory window + * @align: OUT - the base alignment for translating the memory window + * @align_size: OUT - the size alignment for translating the memory window + * + * Get the range of a memory window. NULL may be given for any output + * parameter if the value is not needed. The base and size may be used for + * mapping the memory window, to access the peer memory. The alignment and + * size may be used for translating the memory window, for the peer to access + * memory on the local system. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_mw_get_range(struct ntb_dev *ntb, int idx, + phys_addr_t *base, resource_size_t *size, + resource_size_t *align, resource_size_t *align_size) +{ + return ntb->ops->mw_get_range(ntb, idx, base, size, + align, align_size); +} + +/** + * ntb_mw_set_trans() - set the translation of a memory window + * @ntb: NTB device context. + * @idx: Memory window number. + * @addr: The dma address local memory to expose to the peer. + * @size: The size of the local memory to expose to the peer. + * + * Set the translation of a memory window. The peer may access local memory + * through the window starting at the address, up to the size. The address + * must be aligned to the alignment specified by ntb_mw_get_range(). The size + * must be aligned to the size alignment specified by ntb_mw_get_range(). + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_mw_set_trans(struct ntb_dev *ntb, int idx, + dma_addr_t addr, resource_size_t size) +{ + return ntb->ops->mw_set_trans(ntb, idx, addr, size); +} + +/** + * ntb_mw_clear_trans() - clear the translation of a memory window + * @ntb: NTB device context. + * @idx: Memory window number. + * + * Clear the translation of a memory window. The peer may no longer access + * local memory through the window. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_mw_clear_trans(struct ntb_dev *ntb, int idx) +{ + if (!ntb->ops->mw_clear_trans) + return ntb->ops->mw_set_trans(ntb, idx, 0, 0); + + return ntb->ops->mw_clear_trans(ntb, idx); +} + +/** + * ntb_link_is_up() - get the current ntb link state + * @ntb: NTB device context. + * @speed: OUT - The link speed expressed as PCIe generation number. + * @width: OUT - The link width expressed as the number of PCIe lanes. + * + * Set the translation of a memory window. The peer may access local memory + * through the window starting at the address, up to the size. The address + * must be aligned to the alignment specified by ntb_mw_get_range(). The size + * must be aligned to the size alignment specified by ntb_mw_get_range(). + * + * Return: One if the link is up, zero if the link is down, otherwise a + * negative value indicating the error number. + */ +static inline int ntb_link_is_up(struct ntb_dev *ntb, + enum ntb_speed *speed, enum ntb_width *width) +{ + return ntb->ops->link_is_up(ntb, speed, width); +} + +/** + * ntb_link_enable() - enable the link on the secondary side of the ntb + * @ntb: NTB device context. + * @max_speed: The maximum link speed expressed as PCIe generation number. + * @max_width: The maximum link width expressed as the number of PCIe lanes. + * + * Enable the link on the secondary side of the ntb. This can only be done + * from the primary side of the ntb in primary or b2b topology. The ntb device + * should train the link to its maximum speed and width, or the requested speed + * and width, whichever is smaller, if supported. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_link_enable(struct ntb_dev *ntb, + enum ntb_speed max_speed, + enum ntb_width max_width) +{ + return ntb->ops->link_enable(ntb, max_speed, max_width); +} + +/** + * ntb_link_disable() - disable the link on the secondary side of the ntb + * @ntb: NTB device context. + * + * Disable the link on the secondary side of the ntb. This can only be + * done from the primary side of the ntb in primary or b2b topology. The ntb + * device should disable the link. Returning from this call must indicate that + * a barrier has passed, though with no more writes may pass in either + * direction across the link, except if this call returns an error number. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_link_disable(struct ntb_dev *ntb) +{ + return ntb->ops->link_disable(ntb); +} + +/** + * ntb_db_is_unsafe() - check if it is safe to use hardware doorbell + * @ntb: NTB device context. + * + * It is possible for some ntb hardware to be affected by errata. Hardware + * drivers can advise clients to avoid using doorbells. Clients may ignore + * this advice, though caution is recommended. + * + * Return: Zero if it is safe to use doorbells, or One if it is not safe. + */ +static inline int ntb_db_is_unsafe(struct ntb_dev *ntb) +{ + if (!ntb->ops->db_is_unsafe) + return 0; + + return ntb->ops->db_is_unsafe(ntb); +} + +/** + * ntb_db_valid_mask() - get a mask of doorbell bits supported by the ntb + * @ntb: NTB device context. + * + * Hardware may support different number or arrangement of doorbell bits. + * + * Return: A mask of doorbell bits supported by the ntb. + */ +static inline u64 ntb_db_valid_mask(struct ntb_dev *ntb) +{ + return ntb->ops->db_valid_mask(ntb); +} + +/** + * ntb_db_vector_count() - get the number of doorbell interrupt vectors + * @ntb: NTB device context. + * + * Hardware may support different number of interrupt vectors. + * + * Return: The number of doorbell interrupt vectors. + */ +static inline int ntb_db_vector_count(struct ntb_dev *ntb) +{ + if (!ntb->ops->db_vector_count) + return 1; + + return ntb->ops->db_vector_count(ntb); +} + +/** + * ntb_db_vector_mask() - get a mask of doorbell bits serviced by a vector + * @ntb: NTB device context. + * @vector: Doorbell vector number. + * + * Each interrupt vector may have a different number or arrangement of bits. + * + * Return: A mask of doorbell bits serviced by a vector. + */ +static inline u64 ntb_db_vector_mask(struct ntb_dev *ntb, int vector) +{ + if (!ntb->ops->db_vector_mask) + return ntb_db_valid_mask(ntb); + + return ntb->ops->db_vector_mask(ntb, vector); +} + +/** + * ntb_db_read() - read the local doorbell register + * @ntb: NTB device context. + * + * Read the local doorbell register, and return the bits that are set. + * + * Return: The bits currently set in the local doorbell register. + */ +static inline u64 ntb_db_read(struct ntb_dev *ntb) +{ + return ntb->ops->db_read(ntb); +} + +/** + * ntb_db_set() - set bits in the local doorbell register + * @ntb: NTB device context. + * @db_bits: Doorbell bits to set. + * + * Set bits in the local doorbell register, which may generate a local doorbell + * interrupt. Bits that were already set must remain set. + * + * This is unusual, and hardware may not support it. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_db_set(struct ntb_dev *ntb, u64 db_bits) +{ + if (!ntb->ops->db_set) + return -EINVAL; + + return ntb->ops->db_set(ntb, db_bits); +} + +/** + * ntb_db_clear() - clear bits in the local doorbell register + * @ntb: NTB device context. + * @db_bits: Doorbell bits to clear. + * + * Clear bits in the local doorbell register, arming the bits for the next + * doorbell. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_db_clear(struct ntb_dev *ntb, u64 db_bits) +{ + return ntb->ops->db_clear(ntb, db_bits); +} + +/** + * ntb_db_read_mask() - read the local doorbell mask + * @ntb: NTB device context. + * + * Read the local doorbell mask register, and return the bits that are set. + * + * This is unusual, though hardware is likely to support it. + * + * Return: The bits currently set in the local doorbell mask register. + */ +static inline u64 ntb_db_read_mask(struct ntb_dev *ntb) +{ + if (!ntb->ops->db_read_mask) + return 0; + + return ntb->ops->db_read_mask(ntb); +} + +/** + * ntb_db_set_mask() - set bits in the local doorbell mask + * @ntb: NTB device context. + * @db_bits: Doorbell mask bits to set. + * + * Set bits in the local doorbell mask register, preventing doorbell interrupts + * from being generated for those doorbell bits. Bits that were already set + * must remain set. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits) +{ + return ntb->ops->db_set_mask(ntb, db_bits); +} + +/** + * ntb_db_clear_mask() - clear bits in the local doorbell mask + * @ntb: NTB device context. + * @db_bits: Doorbell bits to clear. + * + * Clear bits in the local doorbell mask register, allowing doorbell interrupts + * from being generated for those doorbell bits. If a doorbell bit is already + * set at the time the mask is cleared, and the corresponding mask bit is + * changed from set to clear, then the ntb driver must ensure that + * ntb_db_event() is called. If the hardware does not generate the interrupt + * on clearing the mask bit, then the driver must call ntb_db_event() anyway. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits) +{ + return ntb->ops->db_clear_mask(ntb, db_bits); +} + +/** + * ntb_peer_db_addr() - address and size of the peer doorbell register + * @ntb: NTB device context. + * @db_addr: OUT - The address of the peer doorbell register. + * @db_size: OUT - The number of bytes to write the peer doorbell register. + * + * Return the address of the peer doorbell register. This may be used, for + * example, by drivers that offload memory copy operations to a dma engine. + * The drivers may wish to ring the peer doorbell at the completion of memory + * copy operations. For efficiency, and to simplify ordering of operations + * between the dma memory copies and the ringing doorbell, the driver may + * append one additional dma memory copy with the doorbell register as the + * destination, after the memory copy operations. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_db_addr(struct ntb_dev *ntb, + phys_addr_t *db_addr, + resource_size_t *db_size) +{ + return ntb->ops->peer_db_addr(ntb, db_addr, db_size); +} + +/** + * ntb_peer_db_read() - read the peer doorbell register + * @ntb: NTB device context. + * + * Read the peer doorbell register, and return the bits that are set. + * + * This is unusual, and hardware may not support it. + * + * Return: The bits currently set in the peer doorbell register. + */ +static inline u64 ntb_peer_db_read(struct ntb_dev *ntb) +{ + if (!ntb->ops->peer_db_read) + return 0; + + return ntb->ops->peer_db_read(ntb); +} + +/** + * ntb_peer_db_set() - set bits in the peer doorbell register + * @ntb: NTB device context. + * @db_bits: Doorbell bits to set. + * + * Set bits in the peer doorbell register, which may generate a peer doorbell + * interrupt. Bits that were already set must remain set. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits) +{ + return ntb->ops->peer_db_set(ntb, db_bits); +} + +/** + * ntb_peer_db_clear() - clear bits in the local doorbell register + * @ntb: NTB device context. + * @db_bits: Doorbell bits to clear. + * + * Clear bits in the peer doorbell register, arming the bits for the next + * doorbell. + * + * This is unusual, and hardware may not support it. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_db_clear(struct ntb_dev *ntb, u64 db_bits) +{ + if (!ntb->ops->db_clear) + return -EINVAL; + + return ntb->ops->peer_db_clear(ntb, db_bits); +} + +/** + * ntb_peer_db_read_mask() - read the peer doorbell mask + * @ntb: NTB device context. + * + * Read the peer doorbell mask register, and return the bits that are set. + * + * This is unusual, and hardware may not support it. + * + * Return: The bits currently set in the peer doorbell mask register. + */ +static inline u64 ntb_peer_db_read_mask(struct ntb_dev *ntb) +{ + if (!ntb->ops->db_read_mask) + return 0; + + return ntb->ops->peer_db_read_mask(ntb); +} + +/** + * ntb_peer_db_set_mask() - set bits in the peer doorbell mask + * @ntb: NTB device context. + * @db_bits: Doorbell mask bits to set. + * + * Set bits in the peer doorbell mask register, preventing doorbell interrupts + * from being generated for those doorbell bits. Bits that were already set + * must remain set. + * + * This is unusual, and hardware may not support it. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_db_set_mask(struct ntb_dev *ntb, u64 db_bits) +{ + if (!ntb->ops->db_set_mask) + return -EINVAL; + + return ntb->ops->peer_db_set_mask(ntb, db_bits); +} + +/** + * ntb_peer_db_clear_mask() - clear bits in the peer doorbell mask + * @ntb: NTB device context. + * @db_bits: Doorbell bits to clear. + * + * Clear bits in the peer doorbell mask register, allowing doorbell interrupts + * from being generated for those doorbell bits. If the hardware does not + * generate the interrupt on clearing the mask bit, then the driver should not + * implement this function! + * + * This is unusual, and hardware may not support it. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_db_clear_mask(struct ntb_dev *ntb, u64 db_bits) +{ + if (!ntb->ops->db_clear_mask) + return -EINVAL; + + return ntb->ops->peer_db_clear_mask(ntb, db_bits); +} + +/** + * ntb_spad_is_unsafe() - check if it is safe to use the hardware scratchpads + * @ntb: NTB device context. + * + * It is possible for some ntb hardware to be affected by errata. Hardware + * drivers can advise clients to avoid using scratchpads. Clients may ignore + * this advice, though caution is recommended. + * + * Return: Zero if it is safe to use scratchpads, or One if it is not safe. + */ +static inline int ntb_spad_is_unsafe(struct ntb_dev *ntb) +{ + if (!ntb->ops->spad_is_unsafe) + return 0; + + return ntb->ops->spad_is_unsafe(ntb); +} + +/** + * ntb_mw_count() - get the number of scratchpads + * @ntb: NTB device context. + * + * Hardware and topology may support a different number of scratchpads. + * + * Return: the number of scratchpads. + */ +static inline int ntb_spad_count(struct ntb_dev *ntb) +{ + return ntb->ops->spad_count(ntb); +} + +/** + * ntb_spad_read() - read the local scratchpad register + * @ntb: NTB device context. + * @idx: Scratchpad index. + * + * Read the local scratchpad register, and return the value. + * + * Return: The value of the local scratchpad register. + */ +static inline u32 ntb_spad_read(struct ntb_dev *ntb, int idx) +{ + return ntb->ops->spad_read(ntb, idx); +} + +/** + * ntb_spad_write() - write the local scratchpad register + * @ntb: NTB device context. + * @idx: Scratchpad index. + * @val: Scratchpad value. + * + * Write the value to the local scratchpad register. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_spad_write(struct ntb_dev *ntb, int idx, u32 val) +{ + return ntb->ops->spad_write(ntb, idx, val); +} + +/** + * ntb_peer_spad_addr() - address of the peer scratchpad register + * @ntb: NTB device context. + * @idx: Scratchpad index. + * @spad_addr: OUT - The address of the peer scratchpad register. + * + * Return the address of the peer doorbell register. This may be used, for + * example, by drivers that offload memory copy operations to a dma engine. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_spad_addr(struct ntb_dev *ntb, int idx, + phys_addr_t *spad_addr) +{ + return ntb->ops->peer_spad_addr(ntb, idx, spad_addr); +} + +/** + * ntb_peer_spad_read() - read the peer scratchpad register + * @ntb: NTB device context. + * @idx: Scratchpad index. + * + * Read the peer scratchpad register, and return the value. + * + * Return: The value of the local scratchpad register. + */ +static inline u32 ntb_peer_spad_read(struct ntb_dev *ntb, int idx) +{ + return ntb->ops->peer_spad_read(ntb, idx); +} + +/** + * ntb_peer_spad_write() - write the peer scratchpad register + * @ntb: NTB device context. + * @idx: Scratchpad index. + * @val: Scratchpad value. + * + * Write the value to the peer scratchpad register. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_spad_write(struct ntb_dev *ntb, int idx, u32 val) +{ + return ntb->ops->peer_spad_write(ntb, idx, val); +} + +#endif -- cgit v1.2.3 From e26a5843f7f5014ae4460030ca4de029a3ac35d3 Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Thu, 9 Apr 2015 10:33:20 -0400 Subject: NTB: Split ntb_hw_intel and ntb_transport drivers Change ntb_hw_intel to use the new NTB hardware abstraction layer. Split ntb_transport into its own driver. Change it to use the new NTB hardware abstraction layer. Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- include/linux/ntb_transport.h | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ntb_transport.h b/include/linux/ntb_transport.h index f78b64cc09d5..2862861366a5 100644 --- a/include/linux/ntb_transport.h +++ b/include/linux/ntb_transport.h @@ -5,6 +5,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2012 Intel Corporation. All rights reserved. + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -13,6 +14,7 @@ * BSD LICENSE * * Copyright(c) 2012 Intel Corporation. All rights reserved. + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -40,7 +42,7 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Intel PCIe NTB Linux driver + * PCIe NTB Transport Linux driver * * Contact Information: * Jon Mason @@ -48,21 +50,16 @@ struct ntb_transport_qp; -struct ntb_client { +struct ntb_transport_client { struct device_driver driver; - int (*probe)(struct pci_dev *pdev); - void (*remove)(struct pci_dev *pdev); + int (*probe)(struct device *client_dev); + void (*remove)(struct device *client_dev); }; -enum { - NTB_LINK_DOWN = 0, - NTB_LINK_UP, -}; - -int ntb_transport_register_client(struct ntb_client *drvr); -void ntb_transport_unregister_client(struct ntb_client *drvr); -int ntb_register_client_dev(char *device_name); -void ntb_unregister_client_dev(char *device_name); +int ntb_transport_register_client(struct ntb_transport_client *drvr); +void ntb_transport_unregister_client(struct ntb_transport_client *drvr); +int ntb_transport_register_client_dev(char *device_name); +void ntb_transport_unregister_client_dev(char *device_name); struct ntb_queue_handlers { void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data, @@ -75,7 +72,7 @@ struct ntb_queue_handlers { unsigned char ntb_transport_qp_num(struct ntb_transport_qp *qp); unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp); struct ntb_transport_qp * -ntb_transport_create_queue(void *data, struct pci_dev *pdev, +ntb_transport_create_queue(void *data, struct device *client_dev, const struct ntb_queue_handlers *handlers); void ntb_transport_free_queue(struct ntb_transport_qp *qp); int ntb_transport_rx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, -- cgit v1.2.3 From 0fd972a7d91d6e15393c449492a04d94c0b89351 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Fri, 1 May 2015 20:13:42 -0400 Subject: module: relocate module_init from init.h to module.h Modular users will always be users of init functionality, but users of init functionality are not necessarily always modules. Hence any functionality like module_init and module_exit would be more at home in the module.h file. And module.h should explicitly include init.h to make the dependency clear. We've already done all the legwork needed to ensure that this move does not cause any build regressions due to implicit header file include assumptions about where module_init lives. Cc: Rusty Russell Acked-by: Rusty Russell Signed-off-by: Paul Gortmaker --- include/linux/init.h | 78 ---------------------------------------------- include/linux/module.h | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 78 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init.h b/include/linux/init.h index 7c68c36d3fd8..b449f378f995 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -282,68 +282,8 @@ void __init parse_early_param(void); void __init parse_early_options(char *cmdline); #endif /* __ASSEMBLY__ */ -/** - * module_init() - driver initialization entry point - * @x: function to be run at kernel boot time or module insertion - * - * module_init() will either be called during do_initcalls() (if - * builtin) or at module insertion time (if a module). There can only - * be one per module. - */ -#define module_init(x) __initcall(x); - -/** - * module_exit() - driver exit entry point - * @x: function to be run when driver is removed - * - * module_exit() will wrap the driver clean-up code - * with cleanup_module() when used with rmmod when - * the driver is a module. If the driver is statically - * compiled into the kernel, module_exit() has no effect. - * There can only be one per module. - */ -#define module_exit(x) __exitcall(x); - #else /* MODULE */ -/* - * In most cases loadable modules do not need custom - * initcall levels. There are still some valid cases where - * a driver may be needed early if built in, and does not - * matter when built as a loadable module. Like bus - * snooping debug drivers. - */ -#define early_initcall(fn) module_init(fn) -#define core_initcall(fn) module_init(fn) -#define core_initcall_sync(fn) module_init(fn) -#define postcore_initcall(fn) module_init(fn) -#define postcore_initcall_sync(fn) module_init(fn) -#define arch_initcall(fn) module_init(fn) -#define subsys_initcall(fn) module_init(fn) -#define subsys_initcall_sync(fn) module_init(fn) -#define fs_initcall(fn) module_init(fn) -#define fs_initcall_sync(fn) module_init(fn) -#define rootfs_initcall(fn) module_init(fn) -#define device_initcall(fn) module_init(fn) -#define device_initcall_sync(fn) module_init(fn) -#define late_initcall(fn) module_init(fn) -#define late_initcall_sync(fn) module_init(fn) - -#define console_initcall(fn) module_init(fn) -#define security_initcall(fn) module_init(fn) - -/* Each module must use one module_init(). */ -#define module_init(initfn) \ - static inline initcall_t __inittest(void) \ - { return initfn; } \ - int init_module(void) __attribute__((alias(#initfn))); - -/* This is only required if you want to be unloadable. */ -#define module_exit(exitfn) \ - static inline exitcall_t __exittest(void) \ - { return exitfn; } \ - void cleanup_module(void) __attribute__((alias(#exitfn))); - #define __setup_param(str, unique_id, fn) /* nothing */ #define __setup(str, func) /* nothing */ #endif @@ -351,24 +291,6 @@ void __init parse_early_options(char *cmdline); /* Data marked not to be saved by software suspend */ #define __nosavedata __section(.data..nosave) -/* This means "can be init if no module support, otherwise module load - may call it." */ -#ifdef CONFIG_MODULES -#define __init_or_module -#define __initdata_or_module -#define __initconst_or_module -#define __INIT_OR_MODULE .text -#define __INITDATA_OR_MODULE .data -#define __INITRODATA_OR_MODULE .section ".rodata","a",%progbits -#else -#define __init_or_module __init -#define __initdata_or_module __initdata -#define __initconst_or_module __initconst -#define __INIT_OR_MODULE __INIT -#define __INITDATA_OR_MODULE __INITDATA -#define __INITRODATA_OR_MODULE __INITRODATA -#endif /*CONFIG_MODULES*/ - #ifdef MODULE #define __exit_p(x) x #else diff --git a/include/linux/module.h b/include/linux/module.h index d67b1932cc59..3a19c79918e0 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -71,6 +72,89 @@ extern struct module_attribute module_uevent; extern int init_module(void); extern void cleanup_module(void); +#ifndef MODULE +/** + * module_init() - driver initialization entry point + * @x: function to be run at kernel boot time or module insertion + * + * module_init() will either be called during do_initcalls() (if + * builtin) or at module insertion time (if a module). There can only + * be one per module. + */ +#define module_init(x) __initcall(x); + +/** + * module_exit() - driver exit entry point + * @x: function to be run when driver is removed + * + * module_exit() will wrap the driver clean-up code + * with cleanup_module() when used with rmmod when + * the driver is a module. If the driver is statically + * compiled into the kernel, module_exit() has no effect. + * There can only be one per module. + */ +#define module_exit(x) __exitcall(x); + +#else /* MODULE */ + +/* + * In most cases loadable modules do not need custom + * initcall levels. There are still some valid cases where + * a driver may be needed early if built in, and does not + * matter when built as a loadable module. Like bus + * snooping debug drivers. + */ +#define early_initcall(fn) module_init(fn) +#define core_initcall(fn) module_init(fn) +#define core_initcall_sync(fn) module_init(fn) +#define postcore_initcall(fn) module_init(fn) +#define postcore_initcall_sync(fn) module_init(fn) +#define arch_initcall(fn) module_init(fn) +#define subsys_initcall(fn) module_init(fn) +#define subsys_initcall_sync(fn) module_init(fn) +#define fs_initcall(fn) module_init(fn) +#define fs_initcall_sync(fn) module_init(fn) +#define rootfs_initcall(fn) module_init(fn) +#define device_initcall(fn) module_init(fn) +#define device_initcall_sync(fn) module_init(fn) +#define late_initcall(fn) module_init(fn) +#define late_initcall_sync(fn) module_init(fn) + +#define console_initcall(fn) module_init(fn) +#define security_initcall(fn) module_init(fn) + +/* Each module must use one module_init(). */ +#define module_init(initfn) \ + static inline initcall_t __inittest(void) \ + { return initfn; } \ + int init_module(void) __attribute__((alias(#initfn))); + +/* This is only required if you want to be unloadable. */ +#define module_exit(exitfn) \ + static inline exitcall_t __exittest(void) \ + { return exitfn; } \ + void cleanup_module(void) __attribute__((alias(#exitfn))); + +#endif + +/* This means "can be init if no module support, otherwise module load + may call it." */ +#ifdef CONFIG_MODULES +#define __init_or_module +#define __initdata_or_module +#define __initconst_or_module +#define __INIT_OR_MODULE .text +#define __INITDATA_OR_MODULE .data +#define __INITRODATA_OR_MODULE .section ".rodata","a",%progbits +#else +#define __init_or_module __init +#define __initdata_or_module __initdata +#define __initconst_or_module __initconst +#define __INIT_OR_MODULE __INIT +#define __INITDATA_OR_MODULE __INITDATA +#define __INITRODATA_OR_MODULE __INITRODATA +#endif /*CONFIG_MODULES*/ + /* Archs provide a method of finding the correct exception table. */ struct exception_table_entry; -- cgit v1.2.3 From 0294112ee3135fbd15eaa70015af8283642dd970 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 4 Jul 2015 03:09:03 +0200 Subject: ACPI / PNP: Reserve ACPI resources at the fs_initcall_sync stage This effectively reverts the following three commits: 7bc10388ccdd ACPI / resources: free memory on error in add_region_before() 0f1b414d1907 ACPI / PNP: Avoid conflicting resource reservations b9a5e5e18fbf ACPI / init: Fix the ordering of acpi_reserve_resources() (commit b9a5e5e18fbf introduced regressions some of which, but not all, were addressed by commit 0f1b414d1907 and commit 7bc10388ccdd was a fixup on top of the latter) and causes ACPI fixed hardware resources to be reserved at the fs_initcall_sync stage of system initialization. The story is as follows. First, a boot regression was reported due to an apparent resource reservation ordering change after a commit that shouldn't lead to such changes. Investigation led to the conclusion that the problem happened because acpi_reserve_resources() was executed at the device_initcall() stage of system initialization which wasn't strictly ordered with respect to driver initialization (and with respect to the initialization of the pcieport driver in particular), so a random change causing the device initcalls to be run in a different order might break things. The response to that was to attempt to run acpi_reserve_resources() as soon as we knew that ACPI would be in use (commit b9a5e5e18fbf). However, that turned out to be too early, because it caused resource reservations made by the PNP system driver to fail on at least one system and that failure was addressed by commit 0f1b414d1907. That fix still turned out to be insufficient, though, because calling acpi_reserve_resources() before the fs_initcall stage of system initialization caused a boot regression to happen on the eCAFE EC-800-H20G/S netbook. That meant that we only could call acpi_reserve_resources() at the fs_initcall initialization stage or later, but then we might just as well call it after the PNP initalization in which case commit 0f1b414d1907 wouldn't be necessary any more. For this reason, the changes made by commit 0f1b414d1907 are reverted (along with a memory leak fixup on top of that commit), the changes made by commit b9a5e5e18fbf that went too far are reverted too and acpi_reserve_resources() is changed into fs_initcall_sync, which will cause it to be executed after the PNP subsystem initialization (which is an fs_initcall) and before device initcalls (including the pcieport driver initialization) which should avoid the initial issue. Link: https://bugzilla.kernel.org/show_bug.cgi?id=100581 Link: http://marc.info/?t=143092384600002&r=1&w=2 Link: https://bugzilla.kernel.org/show_bug.cgi?id=99831 Link: http://marc.info/?t=143389402600001&r=1&w=2 Fixes: b9a5e5e18fbf "ACPI / init: Fix the ordering of acpi_reserve_resources()" Reported-by: Roland Dreier Cc: All applicable Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c471dfc93b71..fedfd1bea3bf 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -309,9 +309,6 @@ int acpi_check_region(resource_size_t start, resource_size_t n, int acpi_resources_are_enforced(void); -int acpi_reserve_region(u64 start, unsigned int length, u8 space_id, - unsigned long flags, char *desc); - #ifdef CONFIG_HIBERNATION void __init acpi_no_s4_hw_signature(void); #endif @@ -507,13 +504,6 @@ static inline int acpi_check_region(resource_size_t start, resource_size_t n, return 0; } -static inline int acpi_reserve_region(u64 start, unsigned int length, - u8 space_id, unsigned long flags, - char *desc) -{ - return -ENXIO; -} - struct acpi_table_header; static inline int acpi_table_parse(char *id, int (*handler)(struct acpi_table_header *)) -- cgit v1.2.3 From 26095a01d359827eeccec5459c28ddd976183179 Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Tue, 7 Jul 2015 01:55:20 +0200 Subject: ACPI / scan: Add support for ACPI _CLS device matching Device drivers typically use ACPI _HIDs/_CIDs listed in struct device_driver acpi_match_table to match devices. However, for generic drivers, we do not want to list _HID for all supported devices. Also, certain classes of devices do not have _CID (e.g. SATA, USB). Instead, we can leverage ACPI _CLS, which specifies PCI-defined class code (i.e. base-class, subclass and programming interface). This patch adds support for matching ACPI devices using the _CLS method. To support loadable module, current design uses _HID or _CID to match device's modalias. With the new way of matching with _CLS this would requires modification to the current ACPI modalias key to include _CLS. This patch appends PCI-defined class-code to the existing ACPI modalias as following. acpi::::..::: E.g: # cat /sys/devices/platform/AMDI0600:00/modalias acpi:AMDI0600:010601: where bb is th base-class code, ss is te sub-class code, and pp is the programming interface code Since there would not be _HID/_CID in the ACPI matching table of the driver, this patch adds a field to acpi_device_id to specify the matching _CLS. static const struct acpi_device_id ahci_acpi_match[] = { { ACPI_DEVICE_CLASS(PCI_CLASS_STORAGE_SATA_AHCI, 0xffffff) }, {}, }; In this case, the corresponded entry in modules.alias file would be: alias acpi*:010601:* ahci_platform Acked-by: Mika Westerberg Reviewed-by: Hanjun Guo Signed-off-by: Suravee Suthikulpanit Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 14 ++++++++++++++ include/linux/mod_devicetable.h | 2 ++ 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c471dfc93b71..bdfdb9f65c23 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -58,6 +58,19 @@ static inline acpi_handle acpi_device_handle(struct acpi_device *adev) acpi_fwnode_handle(adev) : NULL) #define ACPI_HANDLE(dev) acpi_device_handle(ACPI_COMPANION(dev)) +/** + * ACPI_DEVICE_CLASS - macro used to describe an ACPI device with + * the PCI-defined class-code information + * + * @_cls : the class, subclass, prog-if triple for this device + * @_msk : the class mask for this device + * + * This macro is used to create a struct acpi_device_id that matches a + * specific PCI class. The .id and .driver_data fields will be left + * initialized with the default value. + */ +#define ACPI_DEVICE_CLASS(_cls, _msk) .cls = (_cls), .cls_msk = (_msk), + static inline bool has_acpi_companion(struct device *dev) { return is_acpi_node(dev->fwnode); @@ -446,6 +459,7 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *); #define ACPI_COMPANION(dev) (NULL) #define ACPI_COMPANION_SET(dev, adev) do { } while (0) #define ACPI_HANDLE(dev) (NULL) +#define ACPI_DEVICE_CLASS(_cls, _msk) .cls = (0), .cls_msk = (0), struct fwnode_handle; diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 8183d6640ca7..34f25b7bf642 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -189,6 +189,8 @@ struct css_device_id { struct acpi_device_id { __u8 id[ACPI_ID_LEN]; kernel_ulong_t driver_data; + __u32 cls; + __u32 cls_msk; }; #define PNP_ID_LEN 8 -- cgit v1.2.3 From f32dd117051185da6e923b35491a44d7debeeea5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 16:29:38 +0200 Subject: tick/broadcast: Make idle check independent from mode and config Currently the broadcast busy check, which prevents the idle code from going into deep idle, works only in one shot mode. If NOHZ and HIGHRES are off (config or command line) there is no sanity check at all, so under certain conditions cpus are allowed to go into deep idle, where the local timer stops, and are not woken up again because there is no broadcast timer installed or a hrtimer based broadcast device is not evaluated. Move tick_broadcast_oneshot_control() into the common code and provide proper subfunctions for the various config combinations. The common check in tick_broadcast_oneshot_control() is for the C3STOP misfeature flag of the local clock event device. If its not set, idle can proceed. If set, further checks are necessary. Provide checks for the trivial cases: - If broadcast is disabled in the config, then return busy - If oneshot mode (NOHZ/HIGHES) is disabled in the config, return busy if the broadcast device is hrtimer based. - If oneshot mode is enabled in the config call the original tick_broadcast_oneshot_control() function. That function needs extra checks which will be implemented in seperate patches. [ Split out from a larger combo patch ] Reported-and-tested-by: Sudeep Holla Signed-off-by: Thomas Gleixner Cc: Suzuki Poulose Cc: Lorenzo Pieralisi Cc: Catalin Marinas Cc: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Ingo Molnar Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1507070929360.3916@nanos --- include/linux/tick.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index 3741ba1a652c..6916dcb61857 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -67,11 +67,7 @@ extern void tick_broadcast_control(enum tick_broadcast_mode mode); static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { } #endif /* BROADCAST */ -#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) extern int tick_broadcast_oneshot_control(enum tick_broadcast_state state); -#else -static inline int tick_broadcast_oneshot_control(enum tick_broadcast_state state) { return 0; } -#endif static inline void tick_broadcast_enable(void) { -- cgit v1.2.3 From 37b64a42067a04a22468c4e52c12af00d72e462b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 21:56:34 +0200 Subject: tick/broadcast: Unbreak CONFIG_GENERIC_CLOCKEVENTS=n build Making tick_broadcast_oneshot_control() independent from CONFIG_GENERIC_CLOCKEVENTS_BROADCAST broke the build for CONFIG_GENERIC_CLOCKEVENTS=n because the function is not defined there. Provide a proper stub inline. Fixes: f32dd1170511 'tick/broadcast: Make idle check independent from mode and config' Reported-by: kbuild test robot Signed-off-by: Thomas Gleixner --- include/linux/tick.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index 6916dcb61857..edbfc9a5293e 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -67,7 +67,14 @@ extern void tick_broadcast_control(enum tick_broadcast_mode mode); static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { } #endif /* BROADCAST */ +#ifdef CONFIG_GENERIC_CLOCKEVENTS extern int tick_broadcast_oneshot_control(enum tick_broadcast_state state); +#else +static inline int tick_broadcast_oneshot_control(enum tick_broadcast_state state) +{ + return 0; +} +#endif static inline void tick_broadcast_enable(void) { -- cgit v1.2.3 From a899418167264c7bac574b1a0f1b2c26c5b0995a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 5 Jul 2015 17:12:30 +0000 Subject: hotplug: Prevent alloc/free of irq descriptors during cpu up/down When a cpu goes up some architectures (e.g. x86) have to walk the irq space to set up the vector space for the cpu. While this needs extra protection at the architecture level we can avoid a few race conditions by preventing the concurrent allocation/free of irq descriptors and the associated data. When a cpu goes down it moves the interrupts which are targeted to this cpu away by reassigning the affinities. While this happens interrupts can be allocated and freed, which opens a can of race conditions in the code which reassignes the affinities because interrupt descriptors might be freed underneath. Example: CPU1 CPU2 cpu_up/down irq_desc = irq_to_desc(irq); remove_from_radix_tree(desc); raw_spin_lock(&desc->lock); free(desc); We could protect the irq descriptors with RCU, but that would require a full tree change of all accesses to interrupt descriptors. But fortunately these kind of race conditions are rather limited to a few things like cpu hotplug. The normal setup/teardown is very well serialized. So the simpler and obvious solution is: Prevent allocation and freeing of interrupt descriptors accross cpu hotplug. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: xiao jin Cc: Joerg Roedel Cc: Borislav Petkov Cc: Yanmin Zhang Link: http://lkml.kernel.org/r/20150705171102.063519515@linutronix.de --- include/linux/irqdesc.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 624a668e61f1..fcea4e48e21f 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -87,7 +87,12 @@ struct irq_desc { const char *name; } ____cacheline_internodealigned_in_smp; -#ifndef CONFIG_SPARSE_IRQ +#ifdef CONFIG_SPARSE_IRQ +extern void irq_lock_sparse(void); +extern void irq_unlock_sparse(void); +#else +static inline void irq_lock_sparse(void) { } +static inline void irq_unlock_sparse(void) { } extern struct irq_desc irq_desc[NR_IRQS]; #endif -- cgit v1.2.3 From 2d53809594afaf2ae66a90a3142c1b702fd3bcea Mon Sep 17 00:00:00 2001 From: Dirk Behme Date: Mon, 6 Jul 2015 15:57:44 -0700 Subject: Input: zforce_ts - convert to use the gpiod interface Use the new GPIO descriptor interface to handle the zForce GPIOs. This simplifies the code and allows transparently handle GPIO polarity, as specified in device tree data. Also switch to using gpio_{set|get}_value_cansleep() since none of the callers is in atomic context and cansleep variant allows more GPIO controllers to be used with the touchscreen. Signed-off-by: Dirk Behme Signed-off-by: Dmitry Torokhov --- include/linux/platform_data/zforce_ts.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/zforce_ts.h b/include/linux/platform_data/zforce_ts.h index 0472ab2f6ede..7bdece8ef33e 100644 --- a/include/linux/platform_data/zforce_ts.h +++ b/include/linux/platform_data/zforce_ts.h @@ -16,9 +16,6 @@ #define _LINUX_INPUT_ZFORCE_TS_H struct zforce_ts_platdata { - int gpio_int; - int gpio_rst; - unsigned int x_max; unsigned int y_max; }; -- cgit v1.2.3 From 1f6823faa8c563431a94e614d2b63ce16bb6f658 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 9 Jul 2015 10:51:46 +0200 Subject: time: Get rid of do_posix_clock_monotonic_gettime All users gone. Remove it before we get another one. Signed-off-by: Thomas Gleixner --- include/linux/timekeeping.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 3aa72e648650..6e191e4e6ab6 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -145,7 +145,6 @@ static inline void getboottime(struct timespec *ts) } #endif -#define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts) #define ktime_get_real_ts64(ts) getnstimeofday64(ts) /* -- cgit v1.2.3 From 757856d2b9568a701df9ea6a4be68effbb9d6f44 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 25 Jun 2015 17:47:45 +0300 Subject: libceph: enable ceph in a non-default network namespace Grab a reference on a network namespace of the 'rbd map' (in case of rbd) or 'mount' (in case of ceph) process and use that to open sockets instead of always using init_net and bailing if network namespace is anything but init_net. Be careful to not share struct ceph_client instances between different namespaces and don't add any code in the !CONFIG_NET_NS case. This is based on a patch from Hong Zhiguo . Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/ceph/messenger.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index e15499422fdc..37753278987a 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -56,6 +57,7 @@ struct ceph_messenger { struct ceph_entity_addr my_enc_addr; atomic_t stopping; + possible_net_t net; bool nocrc; bool tcp_nodelay; @@ -267,6 +269,7 @@ extern void ceph_messenger_init(struct ceph_messenger *msgr, u64 required_features, bool nocrc, bool tcp_nodelay); +extern void ceph_messenger_fini(struct ceph_messenger *msgr); extern void ceph_con_init(struct ceph_connection *con, void *private, const struct ceph_connection_operations *ops, -- cgit v1.2.3 From 7876f930d0e78addc6bbdbba0d6c196a0788d545 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 9 Jul 2015 16:39:49 -0400 Subject: blkcg: implement all_blkcgs list Add all_blkcgs list goes through blkcg->all_blkcgs_node and is protected by blkcg_pol_mutex. This will be used to fix blkcg_policy_data allocation bug. Signed-off-by: Tejun Heo Cc: Vivek Goyal Cc: Arianna Avanzini Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 58cfab80dd70..cf3e7bc22ef3 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -47,6 +47,7 @@ struct blkcg { struct blkcg_policy_data *pd[BLKCG_MAX_POLS]; + struct list_head all_blkcgs_node; #ifdef CONFIG_CGROUP_WRITEBACK struct list_head cgwb_list; #endif -- cgit v1.2.3 From 06b285bd11257bccc5a1b85a835507e33656aff2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 9 Jul 2015 16:39:50 -0400 Subject: blkcg: fix blkcg_policy_data allocation bug e48453c386f3 ("block, cgroup: implement policy-specific per-blkcg data") updated per-blkcg policy data to be dynamically allocated. When a policy is registered, its policy data aren't created. Instead, when the policy is activated on a queue, the policy data are allocated if there are blkg's (blkcg_gq's) which are attached to a given blkcg. This is buggy. Consider the following scenario. 1. A blkcg is created. No blkg's attached yet. 2. The policy is registered. No policy data is allocated. 3. The policy is activated on a queue. As the above blkcg doesn't have any blkg's, it won't allocate the matching blkcg_policy_data. 4. An IO is issued from the blkcg and blkg is created and the blkcg still doesn't have the matching policy data allocated. With cfq-iosched, this leads to an oops. It also doesn't free policy data on policy unregistration assuming that freeing of all policy data on blkcg destruction should take care of it; however, this also is incorrect. 1. A blkcg has policy data. 2. The policy gets unregistered but the policy data remains. 3. Another policy gets registered on the same slot. 4. Later, the new policy tries to allocate policy data on the previous blkcg but the slot is already occupied and gets skipped. The policy ends up operating on the policy data of the previous policy. There's no reason to manage blkcg_policy_data lazily. The reason we do lazy allocation of blkg's is that the number of all possible blkg's is the product of cgroups and block devices which can reach a surprising level. blkcg_policy_data is contrained by the number of cgroups and shouldn't be a problem. This patch makes blkcg_policy_data to be allocated for all existing blkcg's on policy registration and freed on unregistration and removes blkcg_policy_data handling from policy [de]activation paths. This makes that blkcg_policy_data are created and removed with the policy they belong to and fixes the above described problems. Signed-off-by: Tejun Heo Fixes: e48453c386f3 ("block, cgroup: implement policy-specific per-blkcg data") Cc: Vivek Goyal Cc: Arianna Avanzini Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index cf3e7bc22ef3..1b62d768c7df 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -89,18 +89,12 @@ struct blkg_policy_data { * Policies that need to keep per-blkcg data which is independent * from any request_queue associated to it must specify its size * with the cpd_size field of the blkcg_policy structure and - * embed a blkcg_policy_data in it. blkcg core allocates - * policy-specific per-blkcg structures lazily the first time - * they are actually needed, so it handles them together with - * blkgs. cpd_init() is invoked to let each policy handle - * per-blkcg data. + * embed a blkcg_policy_data in it. cpd_init() is invoked to let + * each policy handle per-blkcg data. */ struct blkcg_policy_data { /* the policy id this per-policy data belongs to */ int plid; - - /* used during policy activation */ - struct list_head alloc_node; }; /* association between a blk cgroup and a request queue */ -- cgit v1.2.3 From 4a0e3e989d66bb7204b163d9cfaa7fa96d0f2023 Mon Sep 17 00:00:00 2001 From: Enrico Mioso Date: Wed, 8 Jul 2015 13:05:57 +0200 Subject: cdc_ncm: Add support for moving NDP to end of NCM frame NCM specs are not actually mandating a specific position in the frame for the NDP (Network Datagram Pointer). However, some Huawei devices will ignore our aggregates if it is not placed after the datagrams it points to. Add support for doing just this, in a per-device configurable way. While at it, update NCM subdrivers, disabling this functionality in all of them, except in huawei_cdc_ncm where it is enabled instead. We aren't making any distinction between different Huawei NCM devices, based on what the vendor driver does. Standard NCM devices are left unaffected: if they are compliant, they should be always usable, still stay on the safe side. This change has been tested and working with a Huawei E3131 device (which works regardless of NDP position), a Huawei E3531 (also working both ways) and an E3372 (which mandates NDP to be after indexed datagrams). V1->V2: - corrected wrong NDP acronym definition - fixed possible NULL pointer dereference - patch cleanup V2->V3: - Properly account for the NDP size when writing new packets to SKB Signed-off-by: Enrico Mioso Signed-off-by: David S. Miller --- include/linux/usb/cdc_ncm.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 7c9b484735c5..1f6526c76ee8 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -80,6 +80,9 @@ #define CDC_NCM_TIMER_INTERVAL_MIN 5UL #define CDC_NCM_TIMER_INTERVAL_MAX (U32_MAX / NSEC_PER_USEC) +/* Driver flags */ +#define CDC_NCM_FLAG_NDP_TO_END 0x02 /* NDP is placed at end of frame */ + #define cdc_ncm_comm_intf_is_mbim(x) ((x)->desc.bInterfaceSubClass == USB_CDC_SUBCLASS_MBIM && \ (x)->desc.bInterfaceProtocol == USB_CDC_PROTO_NONE) #define cdc_ncm_data_intf_is_mbim(x) ((x)->desc.bInterfaceProtocol == USB_CDC_MBIM_PROTO_NTB) @@ -103,9 +106,11 @@ struct cdc_ncm_ctx { spinlock_t mtx; atomic_t stop; + int drvflags; u32 timer_interval; u32 max_ndp_size; + struct usb_cdc_ncm_ndp16 *delayed_ndp16; u32 tx_timer_pending; u32 tx_curr_frame_num; @@ -133,7 +138,7 @@ struct cdc_ncm_ctx { }; u8 cdc_ncm_select_altsetting(struct usb_interface *intf); -int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting); +int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting, int drvflags); void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf); struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign); int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in); -- cgit v1.2.3 From 5544eb9b81940647b8fad1f251b37cbe2819ce44 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 7 Jul 2015 15:41:58 +0200 Subject: KVM: count number of assigned devices If there are no assigned devices, the guest PAT are not providing any useful information and can be overridden to writeback; VMX always does this because it has the "IPAT" bit in its extended page table entries, but SVM does not have anything similar. Hook into VFIO and legacy device assignment so that they provide this information to KVM. Reviewed-by: Alex Williamson Tested-by: Joerg Roedel Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9564fd78c547..05e99b8ef465 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -734,6 +734,24 @@ static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) return false; } #endif +#ifdef __KVM_HAVE_ARCH_ASSIGNED_DEVICE +void kvm_arch_start_assignment(struct kvm *kvm); +void kvm_arch_end_assignment(struct kvm *kvm); +bool kvm_arch_has_assigned_device(struct kvm *kvm); +#else +static inline void kvm_arch_start_assignment(struct kvm *kvm) +{ +} + +static inline void kvm_arch_end_assignment(struct kvm *kvm) +{ +} + +static inline bool kvm_arch_has_assigned_device(struct kvm *kvm) +{ + return false; +} +#endif static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) { -- cgit v1.2.3 From 4200e831e4a8fd09fa4e78de2e571ab270c12d06 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 6 Jul 2015 15:18:24 -0700 Subject: Input: of_touchscreen - switch to using device properties Let's switch form OF to device properties so that common parsing code could work not only on device tree but also on ACPI-based platforms. Reviewed-by: Roger Quadros Signed-off-by: Dmitry Torokhov --- include/linux/input/touchscreen.h | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/input/touchscreen.h b/include/linux/input/touchscreen.h index eecc9ea6cd58..c91e1376132b 100644 --- a/include/linux/input/touchscreen.h +++ b/include/linux/input/touchscreen.h @@ -9,15 +9,8 @@ #ifndef _TOUCHSCREEN_H #define _TOUCHSCREEN_H -#include +struct input_dev; -#ifdef CONFIG_OF -void touchscreen_parse_of_params(struct input_dev *dev, bool multitouch); -#else -static inline void touchscreen_parse_of_params(struct input_dev *dev, - bool multitouch) -{ -} -#endif +void touchscreen_parse_properties(struct input_dev *dev, bool multitouch); #endif -- cgit v1.2.3 From 28a74c050060c17b1edaee2d60470a33be476941 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 6 Jul 2015 11:48:47 -0700 Subject: Input: pixcir_i2c_ts - move platform data Let's move driver's platform data definitions from include/linux/input/ into include/linux/platform_data/ so that it stays with the rest of platform data definitions. Acked-by: Roger Quadros Signed-off-by: Dmitry Torokhov --- include/linux/input/pixcir_ts.h | 64 ----------------------------- include/linux/platform_data/pixcir_i2c_ts.h | 64 +++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 64 deletions(-) delete mode 100644 include/linux/input/pixcir_ts.h create mode 100644 include/linux/platform_data/pixcir_i2c_ts.h (limited to 'include/linux') diff --git a/include/linux/input/pixcir_ts.h b/include/linux/input/pixcir_ts.h deleted file mode 100644 index 7bae83b7c396..000000000000 --- a/include/linux/input/pixcir_ts.h +++ /dev/null @@ -1,64 +0,0 @@ -#ifndef _PIXCIR_I2C_TS_H -#define _PIXCIR_I2C_TS_H - -/* - * Register map - */ -#define PIXCIR_REG_POWER_MODE 51 -#define PIXCIR_REG_INT_MODE 52 - -/* - * Power modes: - * active: max scan speed - * idle: lower scan speed with automatic transition to active on touch - * halt: datasheet says sleep but this is more like halt as the chip - * clocks are cut and it can only be brought out of this mode - * using the RESET pin. - */ -enum pixcir_power_mode { - PIXCIR_POWER_ACTIVE, - PIXCIR_POWER_IDLE, - PIXCIR_POWER_HALT, -}; - -#define PIXCIR_POWER_MODE_MASK 0x03 -#define PIXCIR_POWER_ALLOW_IDLE (1UL << 2) - -/* - * Interrupt modes: - * periodical: interrupt is asserted periodicaly - * diff coordinates: interrupt is asserted when coordinates change - * level on touch: interrupt level asserted during touch - * pulse on touch: interrupt pulse asserted druing touch - * - */ -enum pixcir_int_mode { - PIXCIR_INT_PERIODICAL, - PIXCIR_INT_DIFF_COORD, - PIXCIR_INT_LEVEL_TOUCH, - PIXCIR_INT_PULSE_TOUCH, -}; - -#define PIXCIR_INT_MODE_MASK 0x03 -#define PIXCIR_INT_ENABLE (1UL << 3) -#define PIXCIR_INT_POL_HIGH (1UL << 2) - -/** - * struct pixcir_irc_chip_data - chip related data - * @max_fingers: Max number of fingers reported simultaneously by h/w - * @has_hw_ids: Hardware supports finger tracking IDs - * - */ -struct pixcir_i2c_chip_data { - u8 max_fingers; - bool has_hw_ids; -}; - -struct pixcir_ts_platform_data { - int x_max; - int y_max; - int gpio_attb; /* GPIO connected to ATTB line */ - struct pixcir_i2c_chip_data chip; -}; - -#endif diff --git a/include/linux/platform_data/pixcir_i2c_ts.h b/include/linux/platform_data/pixcir_i2c_ts.h new file mode 100644 index 000000000000..7bae83b7c396 --- /dev/null +++ b/include/linux/platform_data/pixcir_i2c_ts.h @@ -0,0 +1,64 @@ +#ifndef _PIXCIR_I2C_TS_H +#define _PIXCIR_I2C_TS_H + +/* + * Register map + */ +#define PIXCIR_REG_POWER_MODE 51 +#define PIXCIR_REG_INT_MODE 52 + +/* + * Power modes: + * active: max scan speed + * idle: lower scan speed with automatic transition to active on touch + * halt: datasheet says sleep but this is more like halt as the chip + * clocks are cut and it can only be brought out of this mode + * using the RESET pin. + */ +enum pixcir_power_mode { + PIXCIR_POWER_ACTIVE, + PIXCIR_POWER_IDLE, + PIXCIR_POWER_HALT, +}; + +#define PIXCIR_POWER_MODE_MASK 0x03 +#define PIXCIR_POWER_ALLOW_IDLE (1UL << 2) + +/* + * Interrupt modes: + * periodical: interrupt is asserted periodicaly + * diff coordinates: interrupt is asserted when coordinates change + * level on touch: interrupt level asserted during touch + * pulse on touch: interrupt pulse asserted druing touch + * + */ +enum pixcir_int_mode { + PIXCIR_INT_PERIODICAL, + PIXCIR_INT_DIFF_COORD, + PIXCIR_INT_LEVEL_TOUCH, + PIXCIR_INT_PULSE_TOUCH, +}; + +#define PIXCIR_INT_MODE_MASK 0x03 +#define PIXCIR_INT_ENABLE (1UL << 3) +#define PIXCIR_INT_POL_HIGH (1UL << 2) + +/** + * struct pixcir_irc_chip_data - chip related data + * @max_fingers: Max number of fingers reported simultaneously by h/w + * @has_hw_ids: Hardware supports finger tracking IDs + * + */ +struct pixcir_i2c_chip_data { + u8 max_fingers; + bool has_hw_ids; +}; + +struct pixcir_ts_platform_data { + int x_max; + int y_max; + int gpio_attb; /* GPIO connected to ATTB line */ + struct pixcir_i2c_chip_data chip; +}; + +#endif -- cgit v1.2.3 From cb4a5f068096c0cea954f363e70020aabb3555f4 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 6 Jul 2015 11:56:21 -0700 Subject: Input: pixcir_i2c_ts - switch the device over to gpiod This allows uniform parsing on legacy, DT and ACPI systems. Acked-by: Roger Quadros Signed-off-by: Dmitry Torokhov --- include/linux/platform_data/pixcir_i2c_ts.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/pixcir_i2c_ts.h b/include/linux/platform_data/pixcir_i2c_ts.h index 7bae83b7c396..646af6f8b838 100644 --- a/include/linux/platform_data/pixcir_i2c_ts.h +++ b/include/linux/platform_data/pixcir_i2c_ts.h @@ -57,7 +57,6 @@ struct pixcir_i2c_chip_data { struct pixcir_ts_platform_data { int x_max; int y_max; - int gpio_attb; /* GPIO connected to ATTB line */ struct pixcir_i2c_chip_data chip; }; -- cgit v1.2.3 From d3b58c47d330de8c29898fe9746f7530408f8a59 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 26 Jun 2015 11:58:19 +0200 Subject: can: replace timestamp as unique skb attribute Commit 514ac99c64b "can: fix multiple delivery of a single CAN frame for overlapping CAN filters" requires the skb->tstamp to be set to check for identical CAN skbs. Without timestamping to be required by user space applications this timestamp was not generated which lead to commit 36c01245eb8 "can: fix loss of CAN frames in raw_rcv" - which forces the timestamp to be set in all CAN related skbuffs by introducing several __net_timestamp() calls. This forces e.g. out of tree drivers which are not using alloc_can{,fd}_skb() to add __net_timestamp() after skbuff creation to prevent the frame loss fixed in mainline Linux. This patch removes the timestamp dependency and uses an atomic counter to create an unique identifier together with the skbuff pointer. Btw: the new skbcnt element introduced in struct can_skb_priv has to be initialized with zero in out-of-tree drivers which are not using alloc_can{,fd}_skb() too. Signed-off-by: Oliver Hartkopp Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- include/linux/can/skb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index b6a52a4b457a..51bb6532785c 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -27,10 +27,12 @@ /** * struct can_skb_priv - private additional data inside CAN sk_buffs * @ifindex: ifindex of the first interface the CAN frame appeared on + * @skbcnt: atomic counter to have an unique id together with skb pointer * @cf: align to the following CAN frame at skb->data */ struct can_skb_priv { int ifindex; + int skbcnt; struct can_frame cf[0]; }; -- cgit v1.2.3 From 29d01b22eaa18d8b46091d3c98c6001c49f78e4a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 11 Jul 2015 06:43:02 -0400 Subject: locks: new helpers - flock_lock_inode_wait and posix_lock_inode_wait Allow callers to pass in an inode instead of a filp. Signed-off-by: Jeff Layton Reviewed-by: "J. Bruce Fields" Tested-by: "J. Bruce Fields" --- include/linux/fs.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index a0653e560c26..4c990edd1377 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1046,11 +1046,13 @@ extern void locks_remove_file(struct file *); extern void locks_release_private(struct file_lock *); extern void posix_test_lock(struct file *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); +extern int posix_lock_inode_wait(struct inode *, struct file_lock *); extern int posix_lock_file_wait(struct file *, struct file_lock *); extern int posix_unblock_lock(struct file_lock *); extern int vfs_test_lock(struct file *, struct file_lock *); extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); +extern int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl); extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); extern void lease_get_mtime(struct inode *, struct timespec *time); @@ -1137,6 +1139,12 @@ static inline int posix_lock_file(struct file *filp, struct file_lock *fl, return -ENOLCK; } +static inline int posix_lock_inode_wait(struct inode *inode, + struct file_lock *fl) +{ + return -ENOLCK; +} + static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) { return -ENOLCK; @@ -1163,6 +1171,12 @@ static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) return 0; } +static inline int flock_lock_inode_wait(struct inode *inode, + struct file_lock *request) +{ + return -ENOLCK; +} + static inline int flock_lock_file_wait(struct file *filp, struct file_lock *request) { -- cgit v1.2.3 From ee296d7c5709440f8abd36b5b65c6b3e388538d9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 11 Jul 2015 06:43:03 -0400 Subject: locks: inline posix_lock_file_wait and flock_lock_file_wait They just call file_inode and then the corresponding *_inode_file_wait function. Just make them static inlines instead. Signed-off-by: Jeff Layton --- include/linux/fs.h | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 4c990edd1377..cc008c338f5a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1047,13 +1047,11 @@ extern void locks_release_private(struct file_lock *); extern void posix_test_lock(struct file *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); extern int posix_lock_inode_wait(struct inode *, struct file_lock *); -extern int posix_lock_file_wait(struct file *, struct file_lock *); extern int posix_unblock_lock(struct file_lock *); extern int vfs_test_lock(struct file *, struct file_lock *); extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); extern int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl); -extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); extern void lease_get_mtime(struct inode *, struct timespec *time); extern int generic_setlease(struct file *, long, struct file_lock **, void **priv); @@ -1145,11 +1143,6 @@ static inline int posix_lock_inode_wait(struct inode *inode, return -ENOLCK; } -static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) -{ - return -ENOLCK; -} - static inline int posix_unblock_lock(struct file_lock *waiter) { return -ENOENT; @@ -1177,12 +1170,6 @@ static inline int flock_lock_inode_wait(struct inode *inode, return -ENOLCK; } -static inline int flock_lock_file_wait(struct file *filp, - struct file_lock *request) -{ - return -ENOLCK; -} - static inline int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) { return 0; @@ -1216,6 +1203,20 @@ static inline void show_fd_locks(struct seq_file *f, struct file *filp, struct files_struct *files) {} #endif /* !CONFIG_FILE_LOCKING */ +static inline struct inode *file_inode(const struct file *f) +{ + return f->f_inode; +} + +static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) +{ + return posix_lock_inode_wait(file_inode(filp), fl); +} + +static inline int flock_lock_file_wait(struct file *filp, struct file_lock *fl) +{ + return flock_lock_inode_wait(file_inode(filp), fl); +} struct fasync_struct { spinlock_t fa_lock; @@ -2025,11 +2026,6 @@ extern void ihold(struct inode * inode); extern void iput(struct inode *); extern int generic_update_time(struct inode *, struct timespec *, int); -static inline struct inode *file_inode(const struct file *f) -{ - return f->f_inode; -} - /* /sys/fs */ extern struct kobject *fs_kobj; -- cgit v1.2.3 From 30bb6fb39e5c08b9db5bc592d6cbc9a5fc5e67a4 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 15 Jun 2015 13:31:33 +0200 Subject: gpio: Remove double "base" in comment Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index cc7ec129b329..c8393cd4d44f 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -45,7 +45,7 @@ struct seq_file; * @base: identifies the first GPIO number handled by this chip; * or, if negative during registration, requests dynamic ID allocation. * DEPRECATION: providing anything non-negative and nailing the base - * base offset of GPIO chips is deprecated. Please pass -1 as base to + * offset of GPIO chips is deprecated. Please pass -1 as base to * let gpiolib select the chip base in all possible cases. We want to * get rid of the static GPIO number space in the long run. * @ngpio: the number of GPIOs handled by this controller; the last GPIO -- cgit v1.2.3 From e1443d2849b146be4ed8d4ef89ae7e215aafaa5b Mon Sep 17 00:00:00 2001 From: Stephen Chandler Paul Date: Wed, 15 Jul 2015 10:20:17 -0700 Subject: Input: i8042 - add unmask_kbd_data option A big problem with the current i8042 debugging option is that it outputs data going to and from the keyboard by default. As a result, many dmesg logs uploaded by users will unintentionally contain sensitive information such as their password, as such it's probably a good idea not to output data coming from the keyboard unless specifically enabled by the user. Signed-off-by: Stephen Chandler Paul Reviewed-by: Andreas Mohr Reviewed-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov --- include/linux/serio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serio.h b/include/linux/serio.h index 9f779c7a2da4..df4ab5de1586 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -18,6 +18,8 @@ #include #include +extern struct bus_type serio_bus; + struct serio { void *port_data; -- cgit v1.2.3 From 2531c8cf56a640cd7d17057df8484e570716a450 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Fri, 17 Jul 2015 16:23:37 -0700 Subject: mm: hugetlb: allow hugepages_supported to be architecture specific s390 has a constant hugepage size, by setting HPAGE_SHIFT we also change e.g. the pageblock_order, which should be independent in respect to hugepage support. With this patch every architecture is free to define how to check for hugepage support. Signed-off-by: Dominik Dingel Acked-by: Martin Schwidefsky Cc: Heiko Carstens Cc: Christian Borntraeger Cc: Michael Holzheu Cc: Gerald Schaefer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 205026175c42..d891f949466a 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -460,15 +460,14 @@ static inline spinlock_t *huge_pte_lockptr(struct hstate *h, return &mm->page_table_lock; } -static inline bool hugepages_supported(void) -{ - /* - * Some platform decide whether they support huge pages at boot - * time. On these, such as powerpc, HPAGE_SHIFT is set to 0 when - * there is no such support - */ - return HPAGE_SHIFT != 0; -} +#ifndef hugepages_supported +/* + * Some platform decide whether they support huge pages at boot + * time. Some of them, such as powerpc, set HPAGE_SHIFT to 0 + * when there is no such support + */ +#define hugepages_supported() (HPAGE_SHIFT != 0) +#endif #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; -- cgit v1.2.3 From 8db1486065141e619e4855b84e350ef32064f7e1 Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Fri, 17 Jul 2015 16:23:42 -0700 Subject: include, lib: add __printf attributes to several function prototypes Using __printf attributes helps to detect several format string issues at compile time (even though -Wformat-security is currently disabled in Makefile). For example it can detect when formatting a pointer as a number, like the issue fixed in commit a3fa71c40f18 ("wl18xx: show rx_frames_per_rates as an array as it really is"), or when the arguments do not match the format string, c.f. for example commit 5ce1aca81435 ("reiserfs: fix __RASSERT format string"). To prevent similar bugs in the future, add a __printf attribute to every function prototype which needs one in include/linux/ and lib/. These functions were mostly found by using gcc's -Wsuggest-attribute=format flag. Signed-off-by: Nicolas Iooss Cc: Greg Kroah-Hartman Cc: Felipe Balbi Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/clkdev.h | 7 ++++--- include/linux/compat.h | 2 +- include/linux/configfs.h | 3 ++- include/linux/cpu.h | 7 ++++--- include/linux/dcache.h | 3 ++- include/linux/device.h | 15 +++++++-------- include/linux/iommu.h | 2 +- include/linux/kernel.h | 9 +++++---- include/linux/kobject.h | 5 +++-- include/linux/mmiotrace.h | 2 +- include/linux/printk.h | 6 +++--- 11 files changed, 33 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clkdev.h b/include/linux/clkdev.h index a240b18e86fa..08bffcc466de 100644 --- a/include/linux/clkdev.h +++ b/include/linux/clkdev.h @@ -33,18 +33,19 @@ struct clk_lookup { } struct clk_lookup *clkdev_alloc(struct clk *clk, const char *con_id, - const char *dev_fmt, ...); + const char *dev_fmt, ...) __printf(3, 4); void clkdev_add(struct clk_lookup *cl); void clkdev_drop(struct clk_lookup *cl); struct clk_lookup *clkdev_create(struct clk *clk, const char *con_id, - const char *dev_fmt, ...); + const char *dev_fmt, ...) __printf(3, 4); void clkdev_add_table(struct clk_lookup *, size_t); int clk_add_alias(const char *, const char *, const char *, struct device *); -int clk_register_clkdev(struct clk *, const char *, const char *, ...); +int clk_register_clkdev(struct clk *, const char *, const char *, ...) + __printf(3, 4); int clk_register_clkdevs(struct clk *, struct clk_lookup *, size_t); #ifdef CONFIG_COMMON_CLK diff --git a/include/linux/compat.h b/include/linux/compat.h index ab25814690bc..a76c9172b2eb 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -424,7 +424,7 @@ asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv, asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp); -extern int compat_printk(const char *fmt, ...); +extern __printf(1, 2) int compat_printk(const char *fmt, ...); extern void sigset_from_compat(sigset_t *set, const compat_sigset_t *compat); extern void sigset_to_compat(compat_sigset_t *compat, const sigset_t *set); diff --git a/include/linux/configfs.h b/include/linux/configfs.h index c9e5c57e4edf..63a36e89d0eb 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -64,7 +64,8 @@ struct config_item { struct dentry *ci_dentry; }; -extern int config_item_set_name(struct config_item *, const char *, ...); +extern __printf(2, 3) +int config_item_set_name(struct config_item *, const char *, ...); static inline char *config_item_name(struct config_item * item) { diff --git a/include/linux/cpu.h b/include/linux/cpu.h index c0fb6b1b4712..23c30bdcca86 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -40,9 +40,10 @@ extern void cpu_remove_dev_attr(struct device_attribute *attr); extern int cpu_add_dev_attr_group(struct attribute_group *attrs); extern void cpu_remove_dev_attr_group(struct attribute_group *attrs); -extern struct device *cpu_device_create(struct device *parent, void *drvdata, - const struct attribute_group **groups, - const char *fmt, ...); +extern __printf(4, 5) +struct device *cpu_device_create(struct device *parent, void *drvdata, + const struct attribute_group **groups, + const char *fmt, ...); #ifdef CONFIG_HOTPLUG_CPU extern void unregister_cpu(struct cpu *cpu); extern ssize_t arch_cpu_probe(const char *, size_t); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index d2d50249b7b2..d67ae119cf4e 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -327,7 +327,8 @@ static inline unsigned d_count(const struct dentry *dentry) /* * helper function for dentry_operations.d_dname() members */ -extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...); +extern __printf(4, 5) +char *dynamic_dname(struct dentry *, char *, int, const char *, ...); extern char *simple_dname(struct dentry *, char *, int); extern char *__d_path(const struct path *, const struct path *, char *, int); diff --git a/include/linux/device.h b/include/linux/device.h index 5a31bf3a4024..a2b4ea70a946 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -637,8 +637,9 @@ extern int devres_release_group(struct device *dev, void *id); /* managed devm_k.alloc/kfree for device drivers */ extern void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp); -extern char *devm_kvasprintf(struct device *dev, gfp_t gfp, const char *fmt, - va_list ap); +extern __printf(3, 0) +char *devm_kvasprintf(struct device *dev, gfp_t gfp, const char *fmt, + va_list ap); extern __printf(3, 4) char *devm_kasprintf(struct device *dev, gfp_t gfp, const char *fmt, ...); static inline void *devm_kzalloc(struct device *dev, size_t size, gfp_t gfp) @@ -1011,12 +1012,10 @@ extern int __must_check device_reprobe(struct device *dev); /* * Easy functions for dynamically creating devices on the fly */ -extern struct device *device_create_vargs(struct class *cls, - struct device *parent, - dev_t devt, - void *drvdata, - const char *fmt, - va_list vargs); +extern __printf(5, 0) +struct device *device_create_vargs(struct class *cls, struct device *parent, + dev_t devt, void *drvdata, + const char *fmt, va_list vargs); extern __printf(5, 6) struct device *device_create(struct class *cls, struct device *parent, dev_t devt, void *drvdata, diff --git a/include/linux/iommu.h b/include/linux/iommu.h index dc767f7c3704..f9c1b6d0f2e4 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -258,7 +258,7 @@ extern int iommu_domain_set_attr(struct iommu_domain *domain, enum iommu_attr, void *data); struct device *iommu_device_create(struct device *parent, void *drvdata, const struct attribute_group **groups, - const char *fmt, ...); + const char *fmt, ...) __printf(4, 5); void iommu_device_destroy(struct device *dev); int iommu_device_link(struct device *dev, struct device *link); void iommu_device_unlink(struct device *dev, struct device *link); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 5f0be58640ea..5582410727cb 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -411,7 +411,8 @@ extern __printf(3, 0) int vscnprintf(char *buf, size_t size, const char *fmt, va_list args); extern __printf(2, 3) char *kasprintf(gfp_t gfp, const char *fmt, ...); -extern char *kvasprintf(gfp_t gfp, const char *fmt, va_list args); +extern __printf(2, 0) +char *kvasprintf(gfp_t gfp, const char *fmt, va_list args); extern __scanf(2, 3) int sscanf(const char *, const char *, ...); @@ -679,10 +680,10 @@ do { \ __ftrace_vprintk(_THIS_IP_, fmt, vargs); \ } while (0) -extern int +extern __printf(2, 0) int __ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap); -extern int +extern __printf(2, 0) int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap); extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode); @@ -702,7 +703,7 @@ int trace_printk(const char *fmt, ...) { return 0; } -static inline int +static __printf(1, 0) inline int ftrace_vprintk(const char *fmt, va_list ap) { return 0; diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 2d61b909f414..637f67002c5a 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -80,8 +80,9 @@ struct kobject { extern __printf(2, 3) int kobject_set_name(struct kobject *kobj, const char *name, ...); -extern int kobject_set_name_vargs(struct kobject *kobj, const char *fmt, - va_list vargs); +extern __printf(2, 0) +int kobject_set_name_vargs(struct kobject *kobj, const char *fmt, + va_list vargs); static inline const char *kobject_name(const struct kobject *kobj) { diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index c5d52780d6a0..3ba327af055c 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -106,6 +106,6 @@ extern void enable_mmiotrace(void); extern void disable_mmiotrace(void); extern void mmio_trace_rw(struct mmiotrace_rw *rw); extern void mmio_trace_mapping(struct mmiotrace_map *map); -extern int mmio_trace_printk(const char *fmt, va_list args); +extern __printf(1, 0) int mmio_trace_printk(const char *fmt, va_list args); #endif /* _LINUX_MMIOTRACE_H */ diff --git a/include/linux/printk.h b/include/linux/printk.h index 58b1fec40d37..a6298b27ac99 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -122,7 +122,7 @@ static inline __printf(1, 2) __cold void early_printk(const char *s, ...) { } #endif -typedef int(*printk_func_t)(const char *fmt, va_list args); +typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args); #ifdef CONFIG_PRINTK asmlinkage __printf(5, 0) @@ -166,7 +166,7 @@ char *log_buf_addr_get(void); u32 log_buf_len_get(void); void log_buf_kexec_setup(void); void __init setup_log_buf(int early); -void dump_stack_set_arch_desc(const char *fmt, ...); +__printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...); void dump_stack_print_info(const char *log_lvl); void show_regs_print_info(const char *log_lvl); #else @@ -217,7 +217,7 @@ static inline void setup_log_buf(int early) { } -static inline void dump_stack_set_arch_desc(const char *fmt, ...) +static inline __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...) { } -- cgit v1.2.3 From da89947b47a3a355f33a75d7672892c147ed880d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 17 Jul 2015 16:23:50 -0700 Subject: Update Viresh Kumar's email address Switch to my kernel.org alias instead of a badly named gmail address, which I rarely use. Signed-off-by: Viresh Kumar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/amba/sp810.h | 2 +- include/linux/pata_arasan_cf_data.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/amba/sp810.h b/include/linux/amba/sp810.h index c7df89f99115..58fe9e8b6fd7 100644 --- a/include/linux/amba/sp810.h +++ b/include/linux/amba/sp810.h @@ -2,7 +2,7 @@ * ARM PrimeXsys System Controller SP810 header file * * Copyright (C) 2009 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/include/linux/pata_arasan_cf_data.h b/include/linux/pata_arasan_cf_data.h index 3cc21c9cc1e8..9fade5dd2e86 100644 --- a/include/linux/pata_arasan_cf_data.h +++ b/include/linux/pata_arasan_cf_data.h @@ -4,7 +4,7 @@ * Arasan Compact Flash host controller platform data header file * * Copyright (C) 2011 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any -- cgit v1.2.3 From e2cfc91120fa01e3458167054af993fb83d7d0ec Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Fri, 17 Jul 2015 16:24:18 -0700 Subject: mm/page_owner: set correct gfp_mask on page_owner Currently, we set wrong gfp_mask to page_owner info in case of isolated freepage by compaction and split page. It causes incorrect mixed pageblock report that we can get from '/proc/pagetypeinfo'. This metric is really useful to measure fragmentation effect so should be accurate. This patch fixes it by setting correct information. Without this patch, after kernel build workload is finished, number of mixed pageblock is 112 among roughly 210 movable pageblocks. But, with this fix, output shows that mixed pageblock is just 57. Signed-off-by: Joonsoo Kim Cc: Mel Gorman Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_owner.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h index b48c3471c254..cacaabea8a09 100644 --- a/include/linux/page_owner.h +++ b/include/linux/page_owner.h @@ -8,6 +8,7 @@ extern struct page_ext_operations page_owner_ops; extern void __reset_page_owner(struct page *page, unsigned int order); extern void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask); +extern gfp_t __get_page_owner_gfp(struct page *page); static inline void reset_page_owner(struct page *page, unsigned int order) { @@ -25,6 +26,14 @@ static inline void set_page_owner(struct page *page, __set_page_owner(page, order, gfp_mask); } + +static inline gfp_t get_page_owner_gfp(struct page *page) +{ + if (likely(!page_owner_inited)) + return 0; + + return __get_page_owner_gfp(page); +} #else static inline void reset_page_owner(struct page *page, unsigned int order) { @@ -33,6 +42,10 @@ static inline void set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) { } +static inline gfp_t get_page_owner_gfp(struct page *page) +{ + return 0; +} #endif /* CONFIG_PAGE_OWNER */ #endif /* __LINUX_PAGE_OWNER_H */ -- cgit v1.2.3 From 0c8c0f03e3a292e031596484275c14cf39c0ab7a Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 17 Jul 2015 12:28:11 +0200 Subject: x86/fpu, sched: Dynamically allocate 'struct fpu' The FPU rewrite removed the dynamic allocations of 'struct fpu'. But, this potentially wastes massive amounts of memory (2k per task on systems that do not have AVX-512 for instance). Instead of having a separate slab, this patch just appends the space that we need to the 'task_struct' which we dynamically allocate already. This saves from doing an extra slab allocation at fork(). The only real downside here is that we have to stick everything and the end of the task_struct. But, I think the BUILD_BUG_ON()s I stuck in there should keep that from being too fragile. Signed-off-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1437128892-9831-2-git-send-email-mingo@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index ae21f1591615..e43a41d892b6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1522,8 +1522,6 @@ struct task_struct { /* hung task detection */ unsigned long last_switch_count; #endif -/* CPU-specific state of this task */ - struct thread_struct thread; /* filesystem information */ struct fs_struct *fs; /* open file information */ @@ -1778,8 +1776,18 @@ struct task_struct { unsigned long task_state_change; #endif int pagefault_disabled; +/* CPU-specific state of this task */ + struct thread_struct thread; +/* + * WARNING: on x86, 'thread_struct' contains a variable-sized + * structure. It *MUST* be at the end of 'task_struct'. + * + * Do not put anything below here! + */ }; +extern int arch_task_struct_size(void); + /* Future-safe accessor for struct task_struct's cpus_allowed. */ #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) -- cgit v1.2.3 From 5aaeb5c01c5b6c0be7b7aadbf3ace9f3a4458c3d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 17 Jul 2015 12:28:12 +0200 Subject: x86/fpu, sched: Introduce CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT and use it on x86 Don't burden architectures without dynamic task_struct sizing with the overhead of dynamic sizing. Also optimize the x86 code a bit by caching task_struct_size. Acked-and-Tested-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1437128892-9831-3-git-send-email-mingo@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index e43a41d892b6..04b5ada460b4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1786,7 +1786,11 @@ struct task_struct { */ }; -extern int arch_task_struct_size(void); +#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT +extern int arch_task_struct_size __read_mostly; +#else +# define arch_task_struct_size (sizeof(struct task_struct)) +#endif /* Future-safe accessor for struct task_struct's cpus_allowed. */ #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) -- cgit v1.2.3 From 7f3884f7de89c49439fdaa115f6d1caec3256cc3 Mon Sep 17 00:00:00 2001 From: Nick Dyer Date: Tue, 4 Aug 2015 16:36:29 -0700 Subject: Input: atmel_mxt_ts - use deep sleep mode when stopped The hardcoded 0x83 CTRL setting overrides other settings in that byte, enabling extra reporting that may not be useful on a particular platform. Implement improved suspend mechanism via deep sleep. By writing zero to both the active and idle cycle times the maXTouch device can be put into a deep sleep mode, using minimal power. It is necessary to issue a calibrate command after the chip has spent any time in deep sleep, however a soft reset is unnecessary. Use the old method on Chromebook Pixel via platform data option. This patch also deals with the situation where the power configuration is zero on probe, which would mean that the device never wakes up to execute commands. After a config download, the T7 power configuration may have changed so it is necessary to re-read it. Signed-off-by: Nick Dyer Acked-by: Benson Leung Acked-by: Yufeng Shen Signed-off-by: Dmitry Torokhov --- include/linux/i2c/atmel_mxt_ts.h | 25 ------------------------ include/linux/platform_data/atmel_mxt_ts.h | 31 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 25 deletions(-) delete mode 100644 include/linux/i2c/atmel_mxt_ts.h create mode 100644 include/linux/platform_data/atmel_mxt_ts.h (limited to 'include/linux') diff --git a/include/linux/i2c/atmel_mxt_ts.h b/include/linux/i2c/atmel_mxt_ts.h deleted file mode 100644 index 02bf6ea31701..000000000000 --- a/include/linux/i2c/atmel_mxt_ts.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Atmel maXTouch Touchscreen driver - * - * Copyright (C) 2010 Samsung Electronics Co.Ltd - * Author: Joonyoung Shim - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#ifndef __LINUX_ATMEL_MXT_TS_H -#define __LINUX_ATMEL_MXT_TS_H - -#include - -/* The platform data for the Atmel maXTouch touchscreen driver */ -struct mxt_platform_data { - unsigned long irqflags; - u8 t19_num_keys; - const unsigned int *t19_keymap; -}; - -#endif /* __LINUX_ATMEL_MXT_TS_H */ diff --git a/include/linux/platform_data/atmel_mxt_ts.h b/include/linux/platform_data/atmel_mxt_ts.h new file mode 100644 index 000000000000..695035a8d7fb --- /dev/null +++ b/include/linux/platform_data/atmel_mxt_ts.h @@ -0,0 +1,31 @@ +/* + * Atmel maXTouch Touchscreen driver + * + * Copyright (C) 2010 Samsung Electronics Co.Ltd + * Author: Joonyoung Shim + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __LINUX_PLATFORM_DATA_ATMEL_MXT_TS_H +#define __LINUX_PLATFORM_DATA_ATMEL_MXT_TS_H + +#include + +enum mxt_suspend_mode { + MXT_SUSPEND_DEEP_SLEEP = 0, + MXT_SUSPEND_T9_CTRL = 1, +}; + +/* The platform data for the Atmel maXTouch touchscreen driver */ +struct mxt_platform_data { + unsigned long irqflags; + u8 t19_num_keys; + const unsigned int *t19_keymap; + enum mxt_suspend_mode suspend_mode; +}; + +#endif /* __LINUX_PLATFORM_DATA_ATMEL_MXT_TS_H */ -- cgit v1.2.3