From ae1ff3d623905947158fd3394854c23026337810 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 13 Jul 2015 14:31:28 +0300 Subject: iommu: iova: Move iova cache management to the iova library This is necessary to separate intel-iommu from the iova library. Signed-off-by: Sakari Ailus Signed-off-by: David Woodhouse --- include/linux/iova.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iova.h b/include/linux/iova.h index 3920a19d8194..92f7177db2ce 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -68,8 +68,8 @@ static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova) return iova >> iova_shift(iovad); } -int iommu_iova_cache_init(void); -void iommu_iova_cache_destroy(void); +int iova_cache_get(void); +void iova_cache_put(void); struct iova *alloc_iova_mem(void); void free_iova_mem(struct iova *iova); -- cgit v1.2.3 From 17e8351a77397e8a83727eb17e3a3e9b8ab5257a Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Fri, 24 Jul 2015 08:12:54 +0200 Subject: thermal: consistently use int for temperatures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The thermal code uses int, long and unsigned long for temperatures in different places. Using an unsigned type limits the thermal framework to positive temperatures without need. Also several drivers currently will report temperatures near UINT_MAX for temperatures below 0°C. This will probably immediately shut the machine down due to overtemperature if started below 0°C. 'long' is 64bit on several architectures. This is not needed since INT_MAX °mC is above the melting point of all known materials. Consistently use a plain 'int' for temperatures throughout the thermal code and the drivers. This only changes the places in the drivers where the temperature is passed around as pointer, when drivers internally use another type this is not changed. Signed-off-by: Sascha Hauer Acked-by: Geert Uytterhoeven Reviewed-by: Jean Delvare Reviewed-by: Lukasz Majewski Reviewed-by: Darren Hart Reviewed-by: Heiko Stuebner Reviewed-by: Peter Feuerer Cc: Punit Agrawal Cc: Zhang Rui Cc: Eduardo Valentin Cc: linux-pm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Jean Delvare Cc: Peter Feuerer Cc: Heiko Stuebner Cc: Lukasz Majewski Cc: Stephen Warren Cc: Thierry Reding Cc: linux-acpi@vger.kernel.org Cc: platform-driver-x86@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-omap@vger.kernel.org Cc: linux-samsung-soc@vger.kernel.org Cc: Guenter Roeck Cc: Rafael J. Wysocki Cc: Maxime Ripard Cc: Darren Hart Cc: lm-sensors@lm-sensors.org Signed-off-by: Zhang Rui --- include/linux/thermal.h | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 037e9df2f610..17292fee8686 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -92,23 +92,19 @@ struct thermal_zone_device_ops { struct thermal_cooling_device *); int (*unbind) (struct thermal_zone_device *, struct thermal_cooling_device *); - int (*get_temp) (struct thermal_zone_device *, unsigned long *); + int (*get_temp) (struct thermal_zone_device *, int *); int (*get_mode) (struct thermal_zone_device *, enum thermal_device_mode *); int (*set_mode) (struct thermal_zone_device *, enum thermal_device_mode); int (*get_trip_type) (struct thermal_zone_device *, int, enum thermal_trip_type *); - int (*get_trip_temp) (struct thermal_zone_device *, int, - unsigned long *); - int (*set_trip_temp) (struct thermal_zone_device *, int, - unsigned long); - int (*get_trip_hyst) (struct thermal_zone_device *, int, - unsigned long *); - int (*set_trip_hyst) (struct thermal_zone_device *, int, - unsigned long); - int (*get_crit_temp) (struct thermal_zone_device *, unsigned long *); - int (*set_emul_temp) (struct thermal_zone_device *, unsigned long); + int (*get_trip_temp) (struct thermal_zone_device *, int, int *); + int (*set_trip_temp) (struct thermal_zone_device *, int, int); + int (*get_trip_hyst) (struct thermal_zone_device *, int, int *); + int (*set_trip_hyst) (struct thermal_zone_device *, int, int); + int (*get_crit_temp) (struct thermal_zone_device *, int *); + int (*set_emul_temp) (struct thermal_zone_device *, int); int (*get_trend) (struct thermal_zone_device *, int, enum thermal_trend *); int (*notify) (struct thermal_zone_device *, int, @@ -332,9 +328,9 @@ struct thermal_genl_event { * temperature. */ struct thermal_zone_of_device_ops { - int (*get_temp)(void *, long *); + int (*get_temp)(void *, int *); int (*get_trend)(void *, long *); - int (*set_emul_temp)(void *, unsigned long); + int (*set_emul_temp)(void *, int); }; /** @@ -406,7 +402,7 @@ thermal_of_cooling_device_register(struct device_node *np, char *, void *, const struct thermal_cooling_device_ops *); void thermal_cooling_device_unregister(struct thermal_cooling_device *); struct thermal_zone_device *thermal_zone_get_zone_by_name(const char *name); -int thermal_zone_get_temp(struct thermal_zone_device *tz, unsigned long *temp); +int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp); int get_tz_trend(struct thermal_zone_device *, int); struct thermal_instance *get_thermal_instance(struct thermal_zone_device *, @@ -457,7 +453,7 @@ static inline struct thermal_zone_device *thermal_zone_get_zone_by_name( const char *name) { return ERR_PTR(-ENODEV); } static inline int thermal_zone_get_temp( - struct thermal_zone_device *tz, unsigned long *temp) + struct thermal_zone_device *tz, int *temp) { return -ENODEV; } static inline int get_tz_trend(struct thermal_zone_device *tz, int trip) { return -ENODEV; } -- cgit v1.2.3 From 8025e5ddf9c1cac0e632dad49a63abf7848b78cb Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 13 Jul 2015 11:55:44 -0300 Subject: [media] mm: Provide new get_vaddr_frames() helper Provide new function get_vaddr_frames(). This function maps virtual addresses from given start and fills given array with page frame numbers of the corresponding pages. If given start belongs to a normal vma, the function grabs reference to each of the pages to pin them in memory. If start belongs to VM_IO | VM_PFNMAP vma, we don't touch page structures. Caller must make sure pfns aren't reused for anything else while he is using them. This function is created for various drivers to simplify handling of their buffers. Signed-off-by: Jan Kara Acked-by: Mel Gorman Acked-by: Vlastimil Babka Acked-by: Andrew Morton Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/linux/mm.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2e872f92dbac..79ad29a8a60a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -20,6 +20,7 @@ #include #include #include +#include struct mempolicy; struct anon_vma; @@ -1198,6 +1199,49 @@ long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, int write, int force, struct page **pages); int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); + +/* Container for pinned pfns / pages */ +struct frame_vector { + unsigned int nr_allocated; /* Number of frames we have space for */ + unsigned int nr_frames; /* Number of frames stored in ptrs array */ + bool got_ref; /* Did we pin pages by getting page ref? */ + bool is_pfns; /* Does array contain pages or pfns? */ + void *ptrs[0]; /* Array of pinned pfns / pages. Use + * pfns_vector_pages() or pfns_vector_pfns() + * for access */ +}; + +struct frame_vector *frame_vector_create(unsigned int nr_frames); +void frame_vector_destroy(struct frame_vector *vec); +int get_vaddr_frames(unsigned long start, unsigned int nr_pfns, + bool write, bool force, struct frame_vector *vec); +void put_vaddr_frames(struct frame_vector *vec); +int frame_vector_to_pages(struct frame_vector *vec); +void frame_vector_to_pfns(struct frame_vector *vec); + +static inline unsigned int frame_vector_count(struct frame_vector *vec) +{ + return vec->nr_frames; +} + +static inline struct page **frame_vector_pages(struct frame_vector *vec) +{ + if (vec->is_pfns) { + int err = frame_vector_to_pages(vec); + + if (err) + return ERR_PTR(err); + } + return (struct page **)(vec->ptrs); +} + +static inline unsigned long *frame_vector_pfns(struct frame_vector *vec) +{ + if (!vec->is_pfns) + frame_vector_to_pfns(vec); + return (unsigned long *)(vec->ptrs); +} + struct kvec; int get_kernel_pages(const struct kvec *iov, int nr_pages, int write, struct page **pages); -- cgit v1.2.3 From 9642d18eee2cd169b60c6ac0f20bda745b5a3d1e Mon Sep 17 00:00:00 2001 From: Vatika Harlalka Date: Tue, 1 Sep 2015 16:50:59 +0200 Subject: nohz: Affine unpinned timers to housekeepers The problem addressed in this patch is about affining unpinned timers. Adaptive or Full Dynticks CPUs are currently disturbed by unnecessary jitter due to firing of such timers on them. This patch will affine timers to online CPUs which are not full dynticks in NOHZ_FULL configured systems. It should not introduce overhead in nohz full off case due to static keys. Signed-off-by: Vatika Harlalka Signed-off-by: Frederic Weisbecker Reviewed-by: Preeti U Murthy Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1441119060-2230-2-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/tick.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index 48d901f83f92..e312219ff823 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -147,11 +147,20 @@ static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) cpumask_or(mask, mask, tick_nohz_full_mask); } +static inline int housekeeping_any_cpu(void) +{ + return cpumask_any_and(housekeeping_mask, cpu_online_mask); +} + extern void tick_nohz_full_kick(void); extern void tick_nohz_full_kick_cpu(int cpu); extern void tick_nohz_full_kick_all(void); extern void __tick_nohz_task_switch(void); #else +static inline int housekeeping_any_cpu(void) +{ + return smp_processor_id(); +} static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; } static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } -- cgit v1.2.3 From 5e7c4274a70aa2d6f485996d0ca1dad52d0039ca Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 3 Sep 2015 19:28:20 +0300 Subject: block: Check for gaps on front and back merges We are checking for gaps to previous bio_vec, which can only detect back merges gaps. Moreover, at the point where we check for a gap, we don't know if we will attempt a back or a front merge. Thus, check for gap to prev in a back merge attempt and check for a gap to next in a front merge attempt. Signed-off-by: Jens Axboe [sagig: Minor rename change] Signed-off-by: Sagi Grimberg --- include/linux/blkdev.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a622f270f09e..2ff94def041e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1368,6 +1368,26 @@ static inline bool bvec_gap_to_prev(struct request_queue *q, ((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q)); } +static inline bool bio_will_gap(struct request_queue *q, struct bio *prev, + struct bio *next) +{ + if (!bio_has_data(prev)) + return false; + + return bvec_gap_to_prev(q, &prev->bi_io_vec[prev->bi_vcnt - 1], + next->bi_io_vec[0].bv_offset); +} + +static inline bool req_gap_back_merge(struct request *req, struct bio *bio) +{ + return bio_will_gap(req->q, req->biotail, bio); +} + +static inline bool req_gap_front_merge(struct request *req, struct bio *bio) +{ + return bio_will_gap(req->q, bio, req->bio); +} + struct work_struct; int kblockd_schedule_work(struct work_struct *work); int kblockd_schedule_delayed_work(struct delayed_work *dwork, unsigned long delay); -- cgit v1.2.3 From e74bfeedad08180b968d8613dcde141ffb0720c3 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 13 Jul 2015 08:07:17 -0400 Subject: NTB: Add flow control to the ntb_netdev Right now if we push the NTB really hard, we start dropping packets due to not able to process the packets fast enough. We need to st:qop the upper layer from flooding us when that happens. A timer is necessary in order to restart the queue once the resource has been processed on the receive side. Due to the way NTB is setup, the resources on the tx side are tied to the processing of the rx side and there's no async way to know when the rx side has released those resources. Signed-off-by: Dave Jiang Signed-off-by: Jon Mason --- include/linux/ntb_transport.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ntb_transport.h b/include/linux/ntb_transport.h index 2862861366a5..7243eb98a722 100644 --- a/include/linux/ntb_transport.h +++ b/include/linux/ntb_transport.h @@ -83,3 +83,4 @@ void *ntb_transport_rx_remove(struct ntb_transport_qp *qp, unsigned int *len); void ntb_transport_link_up(struct ntb_transport_qp *qp); void ntb_transport_link_down(struct ntb_transport_qp *qp); bool ntb_transport_link_query(struct ntb_transport_qp *qp); +unsigned int ntb_transport_tx_free_entry(struct ntb_transport_qp *qp); -- cgit v1.2.3 From a7c23237481782fbea3c2230e362b72863e144b0 Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Wed, 15 Jul 2015 04:15:28 -0400 Subject: NTB: Fix documentation for ntb_link_is_up There was a copy and paste error in the documentation for ntb_link_is_up. The long description was mistakenly copied from ntb_link_set_trans. This adds the appropriate long description for ntb_link_is_up. Reported-by: Dave Jiang Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- include/linux/ntb.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ntb.h b/include/linux/ntb.h index b02f72bb8e32..e3d3299c6052 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -522,10 +522,9 @@ static inline int ntb_mw_clear_trans(struct ntb_dev *ntb, int idx) * @speed: OUT - The link speed expressed as PCIe generation number. * @width: OUT - The link width expressed as the number of PCIe lanes. * - * Set the translation of a memory window. The peer may access local memory - * through the window starting at the address, up to the size. The address - * must be aligned to the alignment specified by ntb_mw_get_range(). The size - * must be aligned to the size alignment specified by ntb_mw_get_range(). + * Get the current state of the ntb link. It is recommended to query the link + * state once after every link event. It is safe to query the link state in + * the context of the link event callback. * * Return: One if the link is up, zero if the link is down, otherwise a * negative value indicating the error number. -- cgit v1.2.3 From 86663c91866ae85c219f1a80ef2c9460b7ca5cd8 Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Wed, 15 Jul 2015 12:43:21 -0400 Subject: NTB: Fix documentation for ntb_peer_db_clear. The documentation should say "peer" not "local" when referring to the peer doorbell register. Reported-by: Dave Jiang Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- include/linux/ntb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ntb.h b/include/linux/ntb.h index e3d3299c6052..f798e2afba88 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -794,7 +794,7 @@ static inline int ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits) } /** - * ntb_peer_db_clear() - clear bits in the local doorbell register + * ntb_peer_db_clear() - clear bits in the peer doorbell register * @ntb: NTB device context. * @db_bits: Doorbell bits to clear. * -- cgit v1.2.3 From edcd591c77a48da753456f92daf8bb50fe9bac93 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 7 Sep 2015 13:18:03 -0600 Subject: locking/static_keys: Fix a silly typo Commit: 412758cb2670 ("jump label, locking/static_keys: Update docs") introduced a typo that might as well get fixed. Signed-off-by: Jonathan Corbet Cc: Andrew Morton Cc: Jason Baron Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150907131803.54c027e1@lwn.net Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 7f653e8f6690..0684bd3a48fc 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -22,7 +22,7 @@ * DEFINE_STATIC_KEY_TRUE(key); * DEFINE_STATIC_KEY_FALSE(key); * static_key_likely() - * statick_key_unlikely() + * static_key_unlikely() * * Jump labels provide an interface to generate dynamic branches using * self-modifying code. Assuming toolchain and architecture support, if we -- cgit v1.2.3 From 8b9558aab853e98ba6e3fee0dd8545544966958c Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 1 Sep 2015 17:19:38 +0800 Subject: libceph: use keepalive2 to verify the mon session is alive Signed-off-by: Yan, Zheng Signed-off-by: Ilya Dryomov --- include/linux/ceph/libceph.h | 2 ++ include/linux/ceph/messenger.h | 4 ++++ include/linux/ceph/msgr.h | 4 +++- 3 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 9ebee53d3bf5..397c5cd09794 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -46,6 +46,7 @@ struct ceph_options { unsigned long mount_timeout; /* jiffies */ unsigned long osd_idle_ttl; /* jiffies */ unsigned long osd_keepalive_timeout; /* jiffies */ + unsigned long monc_ping_timeout; /* jiffies */ /* * any type that can't be simply compared or doesn't need need @@ -66,6 +67,7 @@ struct ceph_options { #define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000) #define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) #define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) +#define CEPH_MONC_PING_TIMEOUT_DEFAULT msecs_to_jiffies(30 * 1000) #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 37753278987a..7e1252e97a30 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -248,6 +248,8 @@ struct ceph_connection { int in_base_pos; /* bytes read */ __le64 in_temp_ack; /* for reading an ack */ + struct timespec last_keepalive_ack; + struct delayed_work work; /* send|recv work */ unsigned long delay; /* current delay interval */ }; @@ -285,6 +287,8 @@ extern void ceph_msg_revoke(struct ceph_msg *msg); extern void ceph_msg_revoke_incoming(struct ceph_msg *msg); extern void ceph_con_keepalive(struct ceph_connection *con); +extern bool ceph_con_keepalive_expired(struct ceph_connection *con, + unsigned long interval); extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, size_t length, size_t alignment); diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h index 1c1887206ffa..0fe2656ac415 100644 --- a/include/linux/ceph/msgr.h +++ b/include/linux/ceph/msgr.h @@ -84,10 +84,12 @@ struct ceph_entity_inst { #define CEPH_MSGR_TAG_MSG 7 /* message */ #define CEPH_MSGR_TAG_ACK 8 /* message ack */ #define CEPH_MSGR_TAG_KEEPALIVE 9 /* just a keepalive byte! */ -#define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */ +#define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */ #define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */ #define CEPH_MSGR_TAG_FEATURES 12 /* insufficient features */ #define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */ +#define CEPH_MSGR_TAG_KEEPALIVE2 14 /* keepalive2 byte + ceph_timespec */ +#define CEPH_MSGR_TAG_KEEPALIVE2_ACK 15 /* keepalive2 reply */ /* -- cgit v1.2.3 From 4eafbd15b6c84cd3f6c76022c8a6c27f7cc076e1 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 8 Sep 2015 18:41:01 +0200 Subject: PM / OPP: add dev_pm_opp_get_suspend_opp() helper Add dev_pm_opp_get_suspend_opp() helper to obtain suspend opp. Acked-by: Viresh Kumar Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Rafael J. Wysocki --- include/linux/pm_opp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index cab7ba55bedb..e817722ee3f0 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -34,6 +34,7 @@ bool dev_pm_opp_is_turbo(struct dev_pm_opp *opp); int dev_pm_opp_get_opp_count(struct device *dev); unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev); +struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev); struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, unsigned long freq, @@ -80,6 +81,11 @@ static inline unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev) return 0; } +static inline struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev) +{ + return NULL; +} + static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, unsigned long freq, bool available) { -- cgit v1.2.3 From 08e75e754a6d9838e490b74551d19fc04d0fd6f9 Mon Sep 17 00:00:00 2001 From: Javi Merino Date: Fri, 14 Aug 2015 18:56:56 +0100 Subject: PM / devfreq: cache the last call to get_dev_status() The return value of get_dev_status() can be reused. Cache it so that other parts of the kernel can reuse it instead of having to call the same function again. Cc: Kyungmin Park Signed-off-by: Javi Merino Signed-off-by: MyungJoo Ham --- include/linux/devfreq.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index ce447f0f1bad..70a1c60ddda4 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -161,6 +161,7 @@ struct devfreq { struct delayed_work work; unsigned long previous_freq; + struct devfreq_dev_status last_status; void *data; /* private data for governors */ @@ -204,6 +205,15 @@ extern int devm_devfreq_register_opp_notifier(struct device *dev, extern void devm_devfreq_unregister_opp_notifier(struct device *dev, struct devfreq *devfreq); +/** + * devfreq_update_stats() - update the last_status pointer in struct devfreq + * @df: the devfreq instance whose status needs updating + */ +static inline int devfreq_update_stats(struct devfreq *df) +{ + return df->profile->get_dev_status(df->dev.parent, &df->last_status); +} + #if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND) /** * struct devfreq_simple_ondemand_data - void *data fed to struct devfreq @@ -289,6 +299,11 @@ static inline void devm_devfreq_unregister_opp_notifier(struct device *dev, struct devfreq *devfreq) { } + +static inline int devfreq_update_stats(struct devfreq *df) +{ + return -EINVAL; +} #endif /* CONFIG_PM_DEVFREQ */ #endif /* __LINUX_DEVFREQ_H__ */ -- cgit v1.2.3 From d54cdf3fc91aae3780433471d15d73413a845bc0 Mon Sep 17 00:00:00 2001 From: MyungJoo Ham Date: Tue, 18 Aug 2015 13:45:49 +0900 Subject: PM / devfreq: comments for get_dev_status usage updated With the introduction of devfreq_update_stats(), governors are not recommended to use get_dev_status() directly. Signed-off-by: MyungJoo Ham --- include/linux/devfreq.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 70a1c60ddda4..68030e22af35 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -65,7 +65,10 @@ struct devfreq_dev_status { * The "flags" parameter's possible values are * explained above with "DEVFREQ_FLAG_*" macros. * @get_dev_status: The device should provide the current performance - * status to devfreq, which is used by governors. + * status to devfreq. Governors are recommended not to + * use this directly. Instead, governors are recommended + * to use devfreq_update_stats() along with + * devfreq.last_status. * @get_cur_freq: The device should provide the current frequency * at which it is operating. * @exit: An optional callback that is called when devfreq @@ -208,6 +211,10 @@ extern void devm_devfreq_unregister_opp_notifier(struct device *dev, /** * devfreq_update_stats() - update the last_status pointer in struct devfreq * @df: the devfreq instance whose status needs updating + * + * Governors are recommended to use this function along with last_status, + * which allows other entities to reuse the last_status without affecting + * the values fetched later by governors. */ static inline int devfreq_update_stats(struct devfreq *df) { -- cgit v1.2.3 From 7f39add3b08cbbdb99abe50e6d7c342e6800d684 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 11 Sep 2015 09:03:04 -0600 Subject: block: Refuse request/bio merges with gaps in the integrity payload If a driver sets the block queue virtual boundary mask, it means that it cannot handle gaps so we must not allow those in the integrity payload as well. Signed-off-by: Sagi Grimberg Fixed up by me to have duplicate integrity merge functions, depending on whether block integrity is enabled or not. Fixes a compilations issue with CONFIG_BLK_DEV_INTEGRITY unset. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2ff94def041e..1aac7316a4b5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1514,6 +1514,26 @@ queue_max_integrity_segments(struct request_queue *q) return q->limits.max_integrity_segments; } +static inline bool integrity_req_gap_back_merge(struct request *req, + struct bio *next) +{ + struct bio_integrity_payload *bip = bio_integrity(req->bio); + struct bio_integrity_payload *bip_next = bio_integrity(next); + + return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1], + bip_next->bip_vec[0].bv_offset); +} + +static inline bool integrity_req_gap_front_merge(struct request *req, + struct bio *bio) +{ + struct bio_integrity_payload *bip = bio_integrity(bio); + struct bio_integrity_payload *bip_next = bio_integrity(req->bio); + + return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1], + bip_next->bip_vec[0].bv_offset); +} + #else /* CONFIG_BLK_DEV_INTEGRITY */ struct bio; @@ -1580,6 +1600,16 @@ static inline bool blk_integrity_is_initialized(struct gendisk *g) { return 0; } +static inline bool integrity_req_gap_back_merge(struct request *req, + struct bio *next) +{ + return false; +} +static inline bool integrity_req_gap_front_merge(struct request *req, + struct bio *bio) +{ + return false; +} #endif /* CONFIG_BLK_DEV_INTEGRITY */ -- cgit v1.2.3 From 5b25b13ab08f616efd566347d809b4ece54570d1 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 11 Sep 2015 13:07:39 -0700 Subject: sys_membarrier(): system-wide memory barrier (generic, x86) Here is an implementation of a new system call, sys_membarrier(), which executes a memory barrier on all threads running on the system. It is implemented by calling synchronize_sched(). It can be used to distribute the cost of user-space memory barriers asymmetrically by transforming pairs of memory barriers into pairs consisting of sys_membarrier() and a compiler barrier. For synchronization primitives that distinguish between read-side and write-side (e.g. userspace RCU [1], rwlocks), the read-side can be accelerated significantly by moving the bulk of the memory barrier overhead to the write-side. The existing applications of which I am aware that would be improved by this system call are as follows: * Through Userspace RCU library (http://urcu.so) - DNS server (Knot DNS) https://www.knot-dns.cz/ - Network sniffer (http://netsniff-ng.org/) - Distributed object storage (https://sheepdog.github.io/sheepdog/) - User-space tracing (http://lttng.org) - Network storage system (https://www.gluster.org/) - Virtual routers (https://events.linuxfoundation.org/sites/events/files/slides/DPDK_RCU_0MQ.pdf) - Financial software (https://lkml.org/lkml/2015/3/23/189) Those projects use RCU in userspace to increase read-side speed and scalability compared to locking. Especially in the case of RCU used by libraries, sys_membarrier can speed up the read-side by moving the bulk of the memory barrier cost to synchronize_rcu(). * Direct users of sys_membarrier - core dotnet garbage collector (https://github.com/dotnet/coreclr/issues/198) Microsoft core dotnet GC developers are planning to use the mprotect() side-effect of issuing memory barriers through IPIs as a way to implement Windows FlushProcessWriteBuffers() on Linux. They are referring to sys_membarrier in their github thread, specifically stating that sys_membarrier() is what they are looking for. To explain the benefit of this scheme, let's introduce two example threads: Thread A (non-frequent, e.g. executing liburcu synchronize_rcu()) Thread B (frequent, e.g. executing liburcu rcu_read_lock()/rcu_read_unlock()) In a scheme where all smp_mb() in thread A are ordering memory accesses with respect to smp_mb() present in Thread B, we can change each smp_mb() within Thread A into calls to sys_membarrier() and each smp_mb() within Thread B into compiler barriers "barrier()". Before the change, we had, for each smp_mb() pairs: Thread A Thread B previous mem accesses previous mem accesses smp_mb() smp_mb() following mem accesses following mem accesses After the change, these pairs become: Thread A Thread B prev mem accesses prev mem accesses sys_membarrier() barrier() follow mem accesses follow mem accesses As we can see, there are two possible scenarios: either Thread B memory accesses do not happen concurrently with Thread A accesses (1), or they do (2). 1) Non-concurrent Thread A vs Thread B accesses: Thread A Thread B prev mem accesses sys_membarrier() follow mem accesses prev mem accesses barrier() follow mem accesses In this case, thread B accesses will be weakly ordered. This is OK, because at that point, thread A is not particularly interested in ordering them with respect to its own accesses. 2) Concurrent Thread A vs Thread B accesses Thread A Thread B prev mem accesses prev mem accesses sys_membarrier() barrier() follow mem accesses follow mem accesses In this case, thread B accesses, which are ensured to be in program order thanks to the compiler barrier, will be "upgraded" to full smp_mb() by synchronize_sched(). * Benchmarks On Intel Xeon E5405 (8 cores) (one thread is calling sys_membarrier, the other 7 threads are busy looping) 1000 non-expedited sys_membarrier calls in 33s =3D 33 milliseconds/call. * User-space user of this system call: Userspace RCU library Both the signal-based and the sys_membarrier userspace RCU schemes permit us to remove the memory barrier from the userspace RCU rcu_read_lock() and rcu_read_unlock() primitives, thus significantly accelerating them. These memory barriers are replaced by compiler barriers on the read-side, and all matching memory barriers on the write-side are turned into an invocation of a memory barrier on all active threads in the process. By letting the kernel perform this synchronization rather than dumbly sending a signal to every process threads (as we currently do), we diminish the number of unnecessary wake ups and only issue the memory barriers on active threads. Non-running threads do not need to execute such barrier anyway, because these are implied by the scheduler context switches. Results in liburcu: Operations in 10s, 6 readers, 2 writers: memory barriers in reader: 1701557485 reads, 2202847 writes signal-based scheme: 9830061167 reads, 6700 writes sys_membarrier: 9952759104 reads, 425 writes sys_membarrier (dyn. check): 7970328887 reads, 425 writes The dynamic sys_membarrier availability check adds some overhead to the read-side compared to the signal-based scheme, but besides that, sys_membarrier slightly outperforms the signal-based scheme. However, this non-expedited sys_membarrier implementation has a much slower grace period than signal and memory barrier schemes. Besides diminishing the number of wake-ups, one major advantage of the membarrier system call over the signal-based scheme is that it does not need to reserve a signal. This plays much more nicely with libraries, and with processes injected into for tracing purposes, for which we cannot expect that signals will be unused by the application. An expedited version of this system call can be added later on to speed up the grace period. Its implementation will likely depend on reading the cpu_curr()->mm without holding each CPU's rq lock. This patch adds the system call to x86 and to asm-generic. [1] http://urcu.so membarrier(2) man page: MEMBARRIER(2) Linux Programmer's Manual MEMBARRIER(2) NAME membarrier - issue memory barriers on a set of threads SYNOPSIS #include int membarrier(int cmd, int flags); DESCRIPTION The cmd argument is one of the following: MEMBARRIER_CMD_QUERY Query the set of supported commands. It returns a bitmask of supported commands. MEMBARRIER_CMD_SHARED Execute a memory barrier on all threads running on the system. Upon return from system call, the caller thread is ensured that all running threads have passed through a state where all memory accesses to user-space addresses match program order between entry to and return from the system call (non-running threads are de facto in such a state). This covers threads from all pro=E2=80=90 cesses running on the system. This command returns 0. The flags argument needs to be 0. For future extensions. All memory accesses performed in program order from each targeted thread is guaranteed to be ordered with respect to sys_membarrier(). If we use the semantic "barrier()" to represent a compiler barrier forcing memory accesses to be performed in program order across the barrier, and smp_mb() to represent explicit memory barriers forcing full memory ordering across the barrier, we have the following ordering table for each pair of barrier(), sys_membarrier() and smp_mb(): The pair ordering is detailed as (O: ordered, X: not ordered): barrier() smp_mb() sys_membarrier() barrier() X X O smp_mb() X O O sys_membarrier() O O O RETURN VALUE On success, these system calls return zero. On error, -1 is returned, and errno is set appropriately. For a given command, with flags argument set to 0, this system call is guaranteed to always return the same value until reboot. ERRORS ENOSYS System call is not implemented. EINVAL Invalid arguments. Linux 2015-04-15 MEMBARRIER(2) Signed-off-by: Mathieu Desnoyers Reviewed-by: Paul E. McKenney Reviewed-by: Josh Triplett Cc: KOSAKI Motohiro Cc: Steven Rostedt Cc: Nicholas Miell Cc: Ingo Molnar Cc: Alan Cox Cc: Lai Jiangshan Cc: Stephen Hemminger Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: David Howells Cc: Pranith Kumar Cc: Michael Kerrisk Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/syscalls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 08001317aee7..a460e2ef2843 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -885,4 +885,6 @@ asmlinkage long sys_execveat(int dfd, const char __user *filename, const char __user *const __user *argv, const char __user *const __user *envp, int flags); +asmlinkage long sys_membarrier(int cmd, int flags); + #endif -- cgit v1.2.3 From 6798a8caaf64fa68b9ab2044e070fe4545034e03 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 11 Sep 2015 13:07:48 -0700 Subject: fs/seq_file: convert int seq_vprint/seq_printf/etc... returns to void The seq_ function return values were frequently misused. See: commit 1f33c41c03da ("seq_file: Rename seq_overflow() to seq_has_overflowed() and make public") All uses of these return values have been removed, so convert the return types to void. Miscellanea: o Move seq_put_decimal_ and seq_escape prototypes closer the other seq_vprintf prototypes o Reorder seq_putc and seq_puts to return early on overflow o Add argument names to seq_vprintf and seq_printf o Update the seq_escape kernel-doc o Convert a couple of leading spaces to tabs in seq_escape Signed-off-by: Joe Perches Cc: Al Viro Cc: Steven Rostedt Cc: Mark Brown Cc: Stephen Rothwell Cc: Joerg Roedel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/seq_file.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index adeadbd6d7bf..dde00defbaa5 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -114,13 +114,18 @@ int seq_open(struct file *, const struct seq_operations *); ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); loff_t seq_lseek(struct file *, loff_t, int); int seq_release(struct inode *, struct file *); -int seq_escape(struct seq_file *, const char *, const char *); -int seq_putc(struct seq_file *m, char c); -int seq_puts(struct seq_file *m, const char *s); int seq_write(struct seq_file *seq, const void *data, size_t len); -__printf(2, 3) int seq_printf(struct seq_file *, const char *, ...); -__printf(2, 0) int seq_vprintf(struct seq_file *, const char *, va_list args); +__printf(2, 0) +void seq_vprintf(struct seq_file *m, const char *fmt, va_list args); +__printf(2, 3) +void seq_printf(struct seq_file *m, const char *fmt, ...); +void seq_putc(struct seq_file *m, char c); +void seq_puts(struct seq_file *m, const char *s); +void seq_put_decimal_ull(struct seq_file *m, char delimiter, + unsigned long long num); +void seq_put_decimal_ll(struct seq_file *m, char delimiter, long long num); +void seq_escape(struct seq_file *m, const char *s, const char *esc); void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, @@ -138,10 +143,6 @@ int single_release(struct inode *, struct file *); void *__seq_open_private(struct file *, const struct seq_operations *, int); int seq_open_private(struct file *, const struct seq_operations *, int); int seq_release_private(struct inode *, struct file *); -int seq_put_decimal_ull(struct seq_file *m, char delimiter, - unsigned long long num); -int seq_put_decimal_ll(struct seq_file *m, char delimiter, - long long num); static inline struct user_namespace *seq_user_ns(struct seq_file *seq) { -- cgit v1.2.3 From 10fbd36e362a0f367e34a7cd876a81295d8fc5ca Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 27 May 2015 15:32:15 -0700 Subject: blk: rq_data_dir() should not return a boolean MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit rq_data_dir() returns either READ or WRITE (0 == READ, 1 == WRITE), not a boolean value. Now, admittedly the "!= 0" doesn't really change the value (0 stays as zero, 1 stays as one), but it's not only redundant, it confuses gcc, and causes gcc to warn about the construct switch (rq_data_dir(req)) { case READ: ... case WRITE: ... that we have in a few drivers. Now, the gcc warning is silly and stupid (it seems to warn not about the switch value having a different type from the case statements, but about _any_ boolean switch value), but in this case the code itself is silly and stupid too, so let's just change it, and get rid of warnings like this: drivers/block/hd.c: In function ‘hd_request’: drivers/block/hd.c:630:11: warning: switch condition has boolean value [-Wswitch-bool] switch (rq_data_dir(req)) { The odd '!= 0' came in when "cmd_flags" got turned into a "u64" in commit 5953316dbf90 ("block: make rq->cmd_flags be 64-bit") and is presumably because the old code (that just did a logical 'and' with 1) would then end up making the type of rq_data_dir() be u64 too. But if we want to retain the old regular integer type, let's just cast the result to 'int' rather than use that rather odd '!= 0'. Signed-off-by: Linus Torvalds --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 708923b9b623..38a5ff772a37 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -584,7 +584,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) -#define rq_data_dir(rq) (((rq)->cmd_flags & 1) != 0) +#define rq_data_dir(rq) ((int)((rq)->cmd_flags & 1)) /* * Driver can handle struct request, if it either has an old style -- cgit v1.2.3 From cd33dc9ac2977ebe30cecbf39d2992190fbac5b4 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Tue, 8 Sep 2015 14:51:12 +0100 Subject: thermal: Fix thermal_zone_of_sensor_register to match documentation thermal_zone_of_sensor_register is documented as returning a pointer to either a valid thermal_zone_device on success, or a corresponding ERR_PTR() value. In contrast, the function returns NULL when THERMAL_OF is configured off. Fix this. Signed-off-by: Punit Agrawal Acked-by: Guenter Roeck Cc: Eduardo Valentin Cc: Zhang Rui Signed-off-by: Eduardo Valentin --- include/linux/thermal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 17292fee8686..0c5518e13584 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -360,7 +360,7 @@ static inline struct thermal_zone_device * thermal_zone_of_sensor_register(struct device *dev, int id, void *data, const struct thermal_zone_of_device_ops *ops) { - return NULL; + return ERR_PTR(-ENODEV); } static inline -- cgit v1.2.3 From eef7635a22f6b144206b5ca2f1398f637acffc4d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 11 Sep 2015 09:34:26 +0530 Subject: clockevents: Remove unused set_mode() callback All users are migrated to the per-state callbacks, get rid of the unused interface and the core support code. Signed-off-by: Viresh Kumar Signed-off-by: Thomas Gleixner Cc: linaro-kernel@lists.linaro.org Cc: John Stultz Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/fd60de14cf6d125489c031207567bb255ad946f6.1441943991.git.viresh.kumar@linaro.org Signed-off-by: Ingo Molnar --- include/linux/clockchips.h | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 31ce435981fe..bdcf358dfce2 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -18,15 +18,6 @@ struct clock_event_device; struct module; -/* Clock event mode commands for legacy ->set_mode(): OBSOLETE */ -enum clock_event_mode { - CLOCK_EVT_MODE_UNUSED, - CLOCK_EVT_MODE_SHUTDOWN, - CLOCK_EVT_MODE_PERIODIC, - CLOCK_EVT_MODE_ONESHOT, - CLOCK_EVT_MODE_RESUME, -}; - /* * Possible states of a clock event device. * @@ -86,16 +77,14 @@ enum clock_event_state { * @min_delta_ns: minimum delta value in ns * @mult: nanosecond to cycles multiplier * @shift: nanoseconds to cycles divisor (power of two) - * @mode: operating mode, relevant only to ->set_mode(), OBSOLETE * @state_use_accessors:current state of the device, assigned by the core code * @features: features * @retries: number of forced programming retries - * @set_mode: legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME. - * @set_state_periodic: switch state to periodic, if !set_mode - * @set_state_oneshot: switch state to oneshot, if !set_mode - * @set_state_oneshot_stopped: switch state to oneshot_stopped, if !set_mode - * @set_state_shutdown: switch state to shutdown, if !set_mode - * @tick_resume: resume clkevt device, if !set_mode + * @set_state_periodic: switch state to periodic + * @set_state_oneshot: switch state to oneshot + * @set_state_oneshot_stopped: switch state to oneshot_stopped + * @set_state_shutdown: switch state to shutdown + * @tick_resume: resume clkevt device * @broadcast: function to broadcast events * @min_delta_ticks: minimum delta value in ticks stored for reconfiguration * @max_delta_ticks: maximum delta value in ticks stored for reconfiguration @@ -116,18 +105,10 @@ struct clock_event_device { u64 min_delta_ns; u32 mult; u32 shift; - enum clock_event_mode mode; enum clock_event_state state_use_accessors; unsigned int features; unsigned long retries; - /* - * State transition callback(s): Only one of the two groups should be - * defined: - * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME. - * - set_state_{shutdown|periodic|oneshot|oneshot_stopped}(), tick_resume(). - */ - void (*set_mode)(enum clock_event_mode mode, struct clock_event_device *); int (*set_state_periodic)(struct clock_event_device *); int (*set_state_oneshot)(struct clock_event_device *); int (*set_state_oneshot_stopped)(struct clock_event_device *); -- cgit v1.2.3 From c973c3bcec3752455c4d7545edd42935cd7942d9 Mon Sep 17 00:00:00 2001 From: Javi Merino Date: Mon, 14 Sep 2015 14:23:50 +0100 Subject: thermal: Add a function to get the minimum power The thermal core already has a function to get the maximum power of a cooling device: power_actor_get_max_power(). Add a function to get the minimum power of a cooling device. Cc: Zhang Rui Cc: Eduardo Valentin Reviewed-by: Daniel Kurtz Signed-off-by: Javi Merino Signed-off-by: Eduardo Valentin --- include/linux/thermal.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 0c5518e13584..157d366e761b 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -380,6 +380,8 @@ static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev) int power_actor_get_max_power(struct thermal_cooling_device *, struct thermal_zone_device *tz, u32 *max_power); +int power_actor_get_min_power(struct thermal_cooling_device *, + struct thermal_zone_device *tz, u32 *min_power); int power_actor_set_power(struct thermal_cooling_device *, struct thermal_instance *, u32); struct thermal_zone_device *thermal_zone_device_register(const char *, int, int, @@ -415,6 +417,10 @@ static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev) static inline int power_actor_get_max_power(struct thermal_cooling_device *cdev, struct thermal_zone_device *tz, u32 *max_power) { return 0; } +static inline int power_actor_get_min_power(struct thermal_cooling_device *cdev, + struct thermal_zone_device *tz, + u32 *min_power) +{ return -ENODEV; } static inline int power_actor_set_power(struct thermal_cooling_device *cdev, struct thermal_instance *tz, u32 power) { return 0; } -- cgit v1.2.3 From 63cdbc06b357dcb3a7104a421ee4a4550d7fadfd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 14 Sep 2015 17:06:27 +0200 Subject: netfilter: bridge: fix routing of bridge frames with call-iptables=1 We can't re-use the physoutdev storage area. 1. When using NFQUEUE in PREROUTING, we attempt to bump a bogus refcnt since nf_bridge->physoutdev is garbage (ipv4/ipv6 address) 2. for same reason, we crash in physdev match in FORWARD or later if skb is routed instead of bridged. This increases nf_bridge_info to 40 bytes, but we have no other choice. Fixes: 72b1e5e4cac7 ("netfilter: bridge: reduce nf_bridge_info to 32 bytes again") Reported-by: Sander Eikelenboom Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/skbuff.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2738d355cdf9..9987af080fa0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -179,6 +179,9 @@ struct nf_bridge_info { u8 bridged_dnat:1; __u16 frag_max_size; struct net_device *physindev; + + /* always valid & non-NULL from FORWARD on, for physdev match */ + struct net_device *physoutdev; union { /* prerouting: detect dnat in orig/reply direction */ __be32 ipv4_daddr; @@ -189,9 +192,6 @@ struct nf_bridge_info { * skb is out in neigh layer. */ char neigh_header[8]; - - /* always valid & non-NULL from FORWARD on, for physdev match */ - struct net_device *physoutdev; }; }; #endif -- cgit v1.2.3 From 1975dbc276c6ab62230cf4f9df5ddc9ff0e0e473 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 14 Sep 2015 17:11:05 -0600 Subject: locking/static_keys: Fix up the static keys documentation Fix a few small mistakes in the static key documentation and delete an unneeded sentence. Suggested-by: Jason Baron Signed-off-by: Jonathan Corbet Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150914171105.511e1e21@lwn.net Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 0684bd3a48fc..f1094238ab2a 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -21,8 +21,8 @@ * * DEFINE_STATIC_KEY_TRUE(key); * DEFINE_STATIC_KEY_FALSE(key); - * static_key_likely() - * static_key_unlikely() + * static_branch_likely() + * static_branch_unlikely() * * Jump labels provide an interface to generate dynamic branches using * self-modifying code. Assuming toolchain and architecture support, if we @@ -45,12 +45,10 @@ * statement, setting the key to true requires us to patch in a jump * to the out-of-line of true branch. * - * In addtion to static_branch_{enable,disable}, we can also reference count + * In addition to static_branch_{enable,disable}, we can also reference count * the key or branch direction via static_branch_{inc,dec}. Thus, * static_branch_inc() can be thought of as a 'make more true' and - * static_branch_dec() as a 'make more false'. The inc()/dec() - * interface is meant to be used exclusively from the inc()/dec() for a given - * key. + * static_branch_dec() as a 'make more false'. * * Since this relies on modifying code, the branch modifying functions * must be considered absolute slow paths (machine wide synchronization etc.). -- cgit v1.2.3 From 3829c664b1eec243f2a355829efa40f0f414de8d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 15 Sep 2015 13:47:24 +0200 Subject: genirq: Remove stale comment Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 6f8b34066442..72a6b2feb7bf 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -151,10 +151,6 @@ struct irq_common_data { * methods, to allow shared chip implementations * @msi_desc: MSI descriptor * @affinity: IRQ affinity on SMP - * - * The fields here need to overlay the ones in irq_desc until we - * cleaned up the direct references and switched everything over to - * irq_data. */ struct irq_data { u32 mask; -- cgit v1.2.3 From 6584d84c3e504c76ad291cc2e381bbeed59798ab Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Tue, 1 Sep 2015 10:35:50 +0800 Subject: genirq: Update the comment for generic_handle_irq_desc __do_IRQ() was removed by commit 1c77ff2 "genirq: Remove __do_IRQ", but the comment referring to __do_IRQ() was left. Update the comment for generic_handle_irq_desc(). Signed-off-by: Huang Shijie Cc: jiang.liu@linux.intel.com Cc: peterz@infradead.org Cc: rafael.j.wysocki@intel.com Cc: jason@lakedaemon.net Cc: marc.zyngier@arm.com Link: http://lkml.kernel.org/r/1441074950-3893-1-git-send-email-shijie.huang@arm.com Signed-off-by: Thomas Gleixner --- include/linux/irqdesc.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 5acfa26602e1..0593c691d091 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -137,9 +137,7 @@ static inline struct msi_desc *irq_desc_get_msi_desc(struct irq_desc *desc) /* * Architectures call this to let the generic IRQ layer - * handle an interrupt. If the descriptor is attached to an - * irqchip-style controller then we call the ->handle_irq() handler, - * and it calls __do_IRQ() if it's attached to an irqtype-style controller. + * handle an interrupt. */ static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc) { -- cgit v1.2.3 From 1f0bd44e937468446d080b98b5669844744c24a1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 16 Sep 2015 02:17:49 +0200 Subject: cpufreq: acpi-cpufreq: Use cpufreq_cpu_get_raw() in ->get() cpufreq_cpu_get() called by get_cur_freq_on_cpu() is overkill, because the ->get() callback is always invoked in a context in which all of the conditions checked by cpufreq_cpu_get() are guaranteed to be satisfied. Use cpufreq_cpu_get_raw() instead of it and drop the corresponding cpufreq_cpu_put() from get_cur_freq_on_cpu(). Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- include/linux/cpufreq.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 430efcbea48e..dca22de98d94 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -127,9 +127,14 @@ struct cpufreq_policy { #define CPUFREQ_SHARED_TYPE_ANY (3) /* Freq can be set from any dependent CPU*/ #ifdef CONFIG_CPU_FREQ +struct cpufreq_policy *cpufreq_cpu_get_raw(unsigned int cpu); struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu); void cpufreq_cpu_put(struct cpufreq_policy *policy); #else +static inline struct cpufreq_policy *cpufreq_cpu_get_raw(unsigned int cpu) +{ + return NULL; +} static inline struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) { return NULL; -- cgit v1.2.3 From e902e14549e04c040fb6e15785efd35f810a223a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Sep 2015 12:36:04 +0200 Subject: genirq: Remove __irq_set_chip_handler_name_locked() All users converted to irq_set_chip_handler_name_locked() Signed-off-by: Thomas Gleixner --- include/linux/irqdesc.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 0593c691d091..29741382593c 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -184,19 +184,6 @@ static inline void __irq_set_handler_locked(unsigned int irq, desc->handle_irq = handler; } -/* caller has locked the irq_desc and both params are valid */ -static inline void -__irq_set_chip_handler_name_locked(unsigned int irq, struct irq_chip *chip, - irq_flow_handler_t handler, const char *name) -{ - struct irq_desc *desc; - - desc = irq_to_desc(irq); - irq_desc_get_irq_data(desc)->chip = chip; - desc->handle_irq = handler; - desc->name = name; -} - /** * irq_set_handler_locked - Set irq handler from a locked region * @data: Pointer to the irq_data structure which identifies the irq -- cgit v1.2.3 From 123236ccacc933daac3b39c5eb1f0011c70d41d8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Sep 2015 12:54:23 +0200 Subject: genirq: Remove __irq_set_handler_locked() All users converted to irq_set_handler_locked() Signed-off-by: Thomas Gleixner --- include/linux/irqdesc.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 29741382593c..dce395cd67de 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -174,16 +174,6 @@ static inline int irq_has_action(unsigned int irq) return irq_desc_has_action(irq_to_desc(irq)); } -/* caller has locked the irq_desc and both params are valid */ -static inline void __irq_set_handler_locked(unsigned int irq, - irq_flow_handler_t handler) -{ - struct irq_desc *desc; - - desc = irq_to_desc(irq); - desc->handle_irq = handler; -} - /** * irq_set_handler_locked - Set irq handler from a locked region * @data: Pointer to the irq_data structure which identifies the irq -- cgit v1.2.3 From 755d119a6204974b2005a98549a48a75a7f5010b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Sep 2015 14:37:12 +0200 Subject: genirq: Simplify irq_data_to_desc() Avoid the lookup of irq_desc and use the same mechanism for hierarchical and flat irqdomains. Based-on-a-patch-from: Jiang Liu Signed-off-by: Thomas Gleixner --- include/linux/irqdesc.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index dce395cd67de..1fc5304641a1 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -98,11 +98,7 @@ extern struct irq_desc irq_desc[NR_IRQS]; static inline struct irq_desc *irq_data_to_desc(struct irq_data *data) { -#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY - return irq_to_desc(data->irq); -#else - return container_of(data, struct irq_desc, irq_data); -#endif + return container_of(data->common, struct irq_desc, irq_common_data); } static inline unsigned int irq_desc_get_irq(struct irq_desc *desc) -- cgit v1.2.3 From fc5697126aa074c289df5e8baae28e115963023f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 15 Sep 2015 12:33:42 +0200 Subject: genirq: Provide IRQD_FORWARDED_TO_VCPU status flag Provide a irq data flag to mark an irq forwarded to a VCPU along with the accessor functions. Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier --- include/linux/irq.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 72a6b2feb7bf..e54ae8295460 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -186,6 +186,7 @@ struct irq_data { * IRQD_IRQ_MASKED - Masked state of the interrupt * IRQD_IRQ_INPROGRESS - In progress state of the interrupt * IRQD_WAKEUP_ARMED - Wakeup mode armed + * IRQD_FORWARDED_TO_VCPU - The interrupt is forwarded to a VCPU */ enum { IRQD_TRIGGER_MASK = 0xf, @@ -200,6 +201,7 @@ enum { IRQD_IRQ_MASKED = (1 << 17), IRQD_IRQ_INPROGRESS = (1 << 18), IRQD_WAKEUP_ARMED = (1 << 19), + IRQD_FORWARDED_TO_VCPU = (1 << 20), }; #define __irqd_to_state(d) ((d)->common->state_use_accessors) @@ -278,6 +280,20 @@ static inline bool irqd_is_wakeup_armed(struct irq_data *d) return __irqd_to_state(d) & IRQD_WAKEUP_ARMED; } +static inline bool irqd_is_forwarded_to_vcpu(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_FORWARDED_TO_VCPU; +} + +static inline void irqd_set_forwarded_to_vcpu(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_FORWARDED_TO_VCPU; +} + +static inline void irqd_clr_forwarded_to_vcpu(struct irq_data *d) +{ + __irqd_to_state(d) &= ~IRQD_FORWARDED_TO_VCPU; +} /* * Functions for chained handlers which can be enabled/disabled by the -- cgit v1.2.3 From 449e9cae58b06be1293858ec8e5d8cb728238baa Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 1 Jun 2015 16:05:16 +0800 Subject: genirq: Move field 'node' from irq_data into irq_common_data NUMA node information is per-irq instead of per-irqchip, so move it into struct irq_common_data. Also use CONFIG_NUMA to guard irq_common_data.node. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Jason Cooper Cc: Kevin Cernekee Cc: Arnd Bergmann Link: http://lkml.kernel.org/r/1433145945-789-8-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index e54ae8295460..ebcc5c6745eb 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -129,9 +129,13 @@ struct irq_domain; * struct irq_common_data - per irq data shared by all irqchips * @state_use_accessors: status information for irq chip functions. * Use accessor functions to deal with it + * @node: node index useful for balancing */ struct irq_common_data { unsigned int state_use_accessors; +#ifdef CONFIG_NUMA + unsigned int node; +#endif }; /** @@ -139,7 +143,6 @@ struct irq_common_data { * @mask: precomputed bitmask for accessing the chip registers * @irq: interrupt number * @hwirq: hardware interrupt number, local to the interrupt domain - * @node: node index useful for balancing * @common: point to data shared by all irqchips * @chip: low level interrupt hardware access * @domain: Interrupt translation domain; responsible for mapping @@ -156,7 +159,6 @@ struct irq_data { u32 mask; unsigned int irq; unsigned long hwirq; - unsigned int node; struct irq_common_data *common; struct irq_chip *chip; struct irq_domain *domain; @@ -664,9 +666,18 @@ static inline u32 irq_get_trigger_type(unsigned int irq) return d ? irqd_get_trigger_type(d) : 0; } -static inline int irq_data_get_node(struct irq_data *d) +static inline int irq_common_data_get_node(struct irq_common_data *d) { +#ifdef CONFIG_NUMA return d->node; +#else + return 0; +#endif +} + +static inline int irq_data_get_node(struct irq_data *d) +{ + return irq_common_data_get_node(d->common); } static inline struct cpumask *irq_get_affinity_mask(int irq) -- cgit v1.2.3 From af7080e040d223b5e7d0a8de28f7cea24ef017c4 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 1 Jun 2015 16:05:21 +0800 Subject: genirq: Move field 'handler_data' from irq_data into irq_common_data Handler data (handler_data) is per-irq instead of per irqchip, so move it into struct irq_common_data. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Jason Cooper Cc: Kevin Cernekee Cc: Arnd Bergmann Cc: Marc Zyngier Link: http://lkml.kernel.org/r/1433145945-789-13-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 8 ++++---- include/linux/irqdesc.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index ebcc5c6745eb..516aadbfc072 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -130,12 +130,14 @@ struct irq_domain; * @state_use_accessors: status information for irq chip functions. * Use accessor functions to deal with it * @node: node index useful for balancing + * @handler_data: per-IRQ data for the irq_chip methods */ struct irq_common_data { unsigned int state_use_accessors; #ifdef CONFIG_NUMA unsigned int node; #endif + void *handler_data; }; /** @@ -149,7 +151,6 @@ struct irq_common_data { * between hwirq number and linux irq number. * @parent_data: pointer to parent struct irq_data to support hierarchy * irq_domain - * @handler_data: per-IRQ data for the irq_chip methods * @chip_data: platform-specific per-chip private data for the chip * methods, to allow shared chip implementations * @msi_desc: MSI descriptor @@ -165,7 +166,6 @@ struct irq_data { #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY struct irq_data *parent_data; #endif - void *handler_data; void *chip_data; struct msi_desc *msi_desc; cpumask_var_t affinity; @@ -641,12 +641,12 @@ static inline void *irq_data_get_irq_chip_data(struct irq_data *d) static inline void *irq_get_handler_data(unsigned int irq) { struct irq_data *d = irq_get_irq_data(irq); - return d ? d->handler_data : NULL; + return d ? d->common->handler_data : NULL; } static inline void *irq_data_get_irq_handler_data(struct irq_data *d) { - return d->handler_data; + return d->common->handler_data; } static inline struct msi_desc *irq_get_msi_desc(unsigned int irq) diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 1fc5304641a1..c7b3e1cc6d59 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -123,7 +123,7 @@ static inline void *irq_desc_get_chip_data(struct irq_desc *desc) static inline void *irq_desc_get_handler_data(struct irq_desc *desc) { - return desc->irq_data.handler_data; + return desc->irq_common_data.handler_data; } static inline struct msi_desc *irq_desc_get_msi_desc(struct irq_desc *desc) -- cgit v1.2.3 From 9df872faa7e1619e9278bec00ceaed2236533530 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 3 Jun 2015 11:47:50 +0800 Subject: genirq: Move field 'affinity' from irq_data into irq_common_data Irq affinity mask is per-irq instead of per irqchip, so move it into struct irq_common_data. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Jason Cooper Cc: Kevin Cernekee Cc: Arnd Bergmann Link: http://lkml.kernel.org/r/1433303281-27688-1-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 516aadbfc072..75d50544a18f 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -110,8 +110,8 @@ enum { /* * Return value for chip->irq_set_affinity() * - * IRQ_SET_MASK_OK - OK, core updates irq_data.affinity - * IRQ_SET_MASK_NOCPY - OK, chip did update irq_data.affinity + * IRQ_SET_MASK_OK - OK, core updates irq_common_data.affinity + * IRQ_SET_MASK_NOCPY - OK, chip did update irq_common_data.affinity * IRQ_SET_MASK_OK_DONE - Same as IRQ_SET_MASK_OK for core. Special code to * support stacked irqchips, which indicates skipping * all descendent irqchips. @@ -131,6 +131,7 @@ struct irq_domain; * Use accessor functions to deal with it * @node: node index useful for balancing * @handler_data: per-IRQ data for the irq_chip methods + * @affinity: IRQ affinity on SMP */ struct irq_common_data { unsigned int state_use_accessors; @@ -138,6 +139,7 @@ struct irq_common_data { unsigned int node; #endif void *handler_data; + cpumask_var_t affinity; }; /** @@ -154,7 +156,6 @@ struct irq_common_data { * @chip_data: platform-specific per-chip private data for the chip * methods, to allow shared chip implementations * @msi_desc: MSI descriptor - * @affinity: IRQ affinity on SMP */ struct irq_data { u32 mask; @@ -168,7 +169,6 @@ struct irq_data { #endif void *chip_data; struct msi_desc *msi_desc; - cpumask_var_t affinity; }; /* @@ -684,12 +684,12 @@ static inline struct cpumask *irq_get_affinity_mask(int irq) { struct irq_data *d = irq_get_irq_data(irq); - return d ? d->affinity : NULL; + return d ? d->common->affinity : NULL; } static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d) { - return d->affinity; + return d->common->affinity; } unsigned int arch_dynirq_lower_bound(unsigned int from); -- cgit v1.2.3 From b237721c5d95082a803c0be686f56d2dd1de995b Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 1 Jun 2015 16:05:43 +0800 Subject: genirq: Move field 'msi_desc' from irq_data into irq_common_data MSI descriptors are per-irq instead of per irqchip, so move it into struct irq_common_data. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Jason Cooper Cc: Kevin Cernekee Cc: Arnd Bergmann Cc: Marc Zyngier Link: http://lkml.kernel.org/r/1433145945-789-35-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 8 ++++---- include/linux/irqdesc.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 75d50544a18f..4913c32db942 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -132,6 +132,7 @@ struct irq_domain; * @node: node index useful for balancing * @handler_data: per-IRQ data for the irq_chip methods * @affinity: IRQ affinity on SMP + * @msi_desc: MSI descriptor */ struct irq_common_data { unsigned int state_use_accessors; @@ -139,6 +140,7 @@ struct irq_common_data { unsigned int node; #endif void *handler_data; + struct msi_desc *msi_desc; cpumask_var_t affinity; }; @@ -155,7 +157,6 @@ struct irq_common_data { * irq_domain * @chip_data: platform-specific per-chip private data for the chip * methods, to allow shared chip implementations - * @msi_desc: MSI descriptor */ struct irq_data { u32 mask; @@ -168,7 +169,6 @@ struct irq_data { struct irq_data *parent_data; #endif void *chip_data; - struct msi_desc *msi_desc; }; /* @@ -652,12 +652,12 @@ static inline void *irq_data_get_irq_handler_data(struct irq_data *d) static inline struct msi_desc *irq_get_msi_desc(unsigned int irq) { struct irq_data *d = irq_get_irq_data(irq); - return d ? d->msi_desc : NULL; + return d ? d->common->msi_desc : NULL; } static inline struct msi_desc *irq_data_get_msi_desc(struct irq_data *d) { - return d->msi_desc; + return d->common->msi_desc; } static inline u32 irq_get_trigger_type(unsigned int irq) diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index c7b3e1cc6d59..fbb4d5afc32b 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -128,7 +128,7 @@ static inline void *irq_desc_get_handler_data(struct irq_desc *desc) static inline struct msi_desc *irq_desc_get_msi_desc(struct irq_desc *desc) { - return desc->irq_data.msi_desc; + return desc->irq_common_data.msi_desc; } /* -- cgit v1.2.3 From bd0b9ac405e1794d72533c3d487aa65b6b955a0c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Sep 2015 10:42:37 +0200 Subject: genirq: Remove irq argument from irq flow handlers Most interrupt flow handlers do not use the irq argument. Those few which use it can retrieve the irq number from the irq descriptor. Remove the argument. Search and replace was done with coccinelle and some extra helper scripts around it. Thanks to Julia for her help! Signed-off-by: Thomas Gleixner Cc: Julia Lawall Cc: Jiang Liu --- include/linux/irq.h | 16 ++++++++-------- include/linux/irqdesc.h | 4 ++-- include/linux/irqhandler.h | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 4913c32db942..11bf09288ddb 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -475,14 +475,14 @@ static inline int irq_set_parent(int irq, int parent_irq) * Built-in IRQ handlers for various IRQ types, * callable via desc->handle_irq() */ -extern void handle_level_irq(unsigned int irq, struct irq_desc *desc); -extern void handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc); -extern void handle_edge_irq(unsigned int irq, struct irq_desc *desc); -extern void handle_edge_eoi_irq(unsigned int irq, struct irq_desc *desc); -extern void handle_simple_irq(unsigned int irq, struct irq_desc *desc); -extern void handle_percpu_irq(unsigned int irq, struct irq_desc *desc); -extern void handle_percpu_devid_irq(unsigned int irq, struct irq_desc *desc); -extern void handle_bad_irq(unsigned int irq, struct irq_desc *desc); +extern void handle_level_irq(struct irq_desc *desc); +extern void handle_fasteoi_irq(struct irq_desc *desc); +extern void handle_edge_irq(struct irq_desc *desc); +extern void handle_edge_eoi_irq(struct irq_desc *desc); +extern void handle_simple_irq(struct irq_desc *desc); +extern void handle_percpu_irq(struct irq_desc *desc); +extern void handle_percpu_devid_irq(struct irq_desc *desc); +extern void handle_bad_irq(struct irq_desc *desc); extern void handle_nested_irq(unsigned int irq); extern int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg); diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index fbb4d5afc32b..a587a33363c7 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -135,9 +135,9 @@ static inline struct msi_desc *irq_desc_get_msi_desc(struct irq_desc *desc) * Architectures call this to let the generic IRQ layer * handle an interrupt. */ -static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc) +static inline void generic_handle_irq_desc(struct irq_desc *desc) { - desc->handle_irq(irq, desc); + desc->handle_irq(desc); } int generic_handle_irq(unsigned int irq); diff --git a/include/linux/irqhandler.h b/include/linux/irqhandler.h index 62d543004197..661bed0ed1f3 100644 --- a/include/linux/irqhandler.h +++ b/include/linux/irqhandler.h @@ -8,7 +8,7 @@ struct irq_desc; struct irq_data; -typedef void (*irq_flow_handler_t)(unsigned int irq, struct irq_desc *desc); +typedef void (*irq_flow_handler_t)(struct irq_desc *desc); typedef void (*irq_preflow_handler_t)(struct irq_data *data); #endif -- cgit v1.2.3 From 0c986253b939cc14c69d4adbe2b4121bdf4aa220 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Sep 2015 11:51:12 -0400 Subject: Revert "sched, cgroup: replace signal_struct->group_rwsem with a global percpu_rwsem" This reverts commit d59cfc09c32a2ae31f1c3bc2983a0cd79afb3f14. d59cfc09c32a ("sched, cgroup: replace signal_struct->group_rwsem with a global percpu_rwsem") and b5ba75b5fc0e ("cgroup: simplify threadgroup locking") changed how cgroup synchronizes against task fork and exits so that it uses global percpu_rwsem instead of per-process rwsem; unfortunately, the write [un]lock paths of percpu_rwsem always involve synchronize_rcu_expedited() which turned out to be too expensive. Improvements for percpu_rwsem are scheduled to be merged in the coming v4.4-rc1 merge window which alleviates this issue. For now, revert the two commits to restore per-process rwsem. They will be re-applied for the v4.4-rc1 merge window. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/g/55F8097A.7000206@de.ibm.com Reported-by: Christian Borntraeger Cc: Oleg Nesterov Cc: "Paul E. McKenney" Cc: Peter Zijlstra Cc: Paolo Bonzini Cc: stable@vger.kernel.org # v4.2+ --- include/linux/cgroup-defs.h | 27 ++------------------------- include/linux/init_task.h | 8 ++++++++ include/linux/sched.h | 12 ++++++++++++ 3 files changed, 22 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 4d8fcf2187dc..8492721b39be 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -473,31 +473,8 @@ struct cgroup_subsys { unsigned int depends_on; }; -extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem; - -/** - * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups - * @tsk: target task - * - * Called from threadgroup_change_begin() and allows cgroup operations to - * synchronize against threadgroup changes using a percpu_rw_semaphore. - */ -static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) -{ - percpu_down_read(&cgroup_threadgroup_rwsem); -} - -/** - * cgroup_threadgroup_change_end - threadgroup exclusion for cgroups - * @tsk: target task - * - * Called from threadgroup_change_end(). Counterpart of - * cgroup_threadcgroup_change_begin(). - */ -static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) -{ - percpu_up_read(&cgroup_threadgroup_rwsem); -} +void cgroup_threadgroup_change_begin(struct task_struct *tsk); +void cgroup_threadgroup_change_end(struct task_struct *tsk); #else /* CONFIG_CGROUPS */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index d0b380ee7d67..e38681f4912d 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -25,6 +25,13 @@ extern struct files_struct init_files; extern struct fs_struct init_fs; +#ifdef CONFIG_CGROUPS +#define INIT_GROUP_RWSEM(sig) \ + .group_rwsem = __RWSEM_INITIALIZER(sig.group_rwsem), +#else +#define INIT_GROUP_RWSEM(sig) +#endif + #ifdef CONFIG_CPUSETS #define INIT_CPUSET_SEQ(tsk) \ .mems_allowed_seq = SEQCNT_ZERO(tsk.mems_allowed_seq), @@ -57,6 +64,7 @@ extern struct fs_struct init_fs; INIT_PREV_CPUTIME(sig) \ .cred_guard_mutex = \ __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ + INIT_GROUP_RWSEM(sig) \ } extern struct nsproxy init_nsproxy; diff --git a/include/linux/sched.h b/include/linux/sched.h index a4ab9daa387c..b7b9501b41af 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -762,6 +762,18 @@ struct signal_struct { unsigned audit_tty_log_passwd; struct tty_audit_buf *tty_audit_buf; #endif +#ifdef CONFIG_CGROUPS + /* + * group_rwsem prevents new tasks from entering the threadgroup and + * member tasks from exiting,a more specifically, setting of + * PF_EXITING. fork and exit paths are protected with this rwsem + * using threadgroup_change_begin/end(). Users which require + * threadgroup to remain stable should use threadgroup_[un]lock() + * which also takes care of exec path. Currently, cgroup is the + * only user. + */ + struct rw_semaphore group_rwsem; +#endif oom_flags_t oom_flags; short oom_score_adj; /* OOM kill score adjustment */ -- cgit v1.2.3 From 0243ed44ad4a25dbd2e92ad97e5e12a1a6c72d6c Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 15 Sep 2015 04:59:21 -0700 Subject: spi: fix kernel-doc warnings in spi.h Fix the following 'make htmldocs' warnings: .//include/linux/spi/spi.h:71: warning: No description found for parameter 'lock' .//include/linux/spi/spi.h:71: warning: Excess struct/union/enum/typedef member 'clock' description in 'spi_statistics' Signed-off-by: Geliang Tang Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 269e8afd3e2a..6b00f18f5e6b 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -34,7 +34,7 @@ extern struct bus_type spi_bus_type; /** * struct spi_statistics - statistics for spi transfers - * @clock: lock protecting this structure + * @lock: lock protecting this structure * * @messages: number of spi-messages handled * @transfers: number of spi_transfers handled -- cgit v1.2.3 From 54552d03023cfd485cedf8d7471d1554139d58aa Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 2 Sep 2015 14:21:29 +0200 Subject: ieee802154: 6lowpan: check on valid 802.15.4 frame This patch adds frame control checks to check if the received frame is something which could contain a 6LoWPAN packet. Reviewed-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 1dc1f4ed4001..db01492814d3 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -205,6 +205,31 @@ enum { IEEE802154_SCAN_IN_PROGRESS = 0xfc, }; +/* frame control handling */ +#define IEEE802154_FCTL_FTYPE 0x0003 +#define IEEE802154_FCTL_INTRA_PAN 0x0040 + +#define IEEE802154_FTYPE_DATA 0x0001 + +/* + * ieee802154_is_data - check if type is IEEE802154_FTYPE_DATA + * @fc: frame control bytes in little-endian byteorder + */ +static inline int ieee802154_is_data(__le16 fc) +{ + return (fc & cpu_to_le16(IEEE802154_FCTL_FTYPE)) == + cpu_to_le16(IEEE802154_FTYPE_DATA); +} + +/** + * ieee802154_is_intra_pan - check if intra pan id communication + * @fc: frame control bytes in little-endian byteorder + */ +static inline bool ieee802154_is_intra_pan(__le16 fc) +{ + return fc & cpu_to_le16(IEEE802154_FCTL_INTRA_PAN); +} + /** * ieee802154_is_valid_psdu_len - check if psdu len is valid * available lengths: -- cgit v1.2.3 From 7f61f545657281a3a1b0faf68993165ebdecc51b Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 14 Sep 2015 16:01:05 +0300 Subject: libceph: don't access invalid memory in keepalive2 path This struct ceph_timespec ceph_ts; ... con_out_kvec_add(con, sizeof(ceph_ts), &ceph_ts); wraps ceph_ts into a kvec and adds it to con->out_kvec array, yet ceph_ts becomes invalid on return from prepare_write_keepalive(). As a result, we send out bogus keepalive2 stamps. Fix this by encoding into a ceph_timespec member, similar to how acks are read and written. Signed-off-by: Ilya Dryomov Reviewed-by: Yan, Zheng --- include/linux/ceph/messenger.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 7e1252e97a30..b2371d9b51fa 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -238,6 +238,8 @@ struct ceph_connection { bool out_kvec_is_msg; /* kvec refers to out_msg */ int out_more; /* there is more data after the kvecs */ __le64 out_temp_ack; /* for writing an ack */ + struct ceph_timespec out_temp_keepalive2; /* for writing keepalive2 + stamp */ /* message in temps */ struct ceph_msg_header in_hdr; @@ -248,7 +250,7 @@ struct ceph_connection { int in_base_pos; /* bytes read */ __le64 in_temp_ack; /* for reading an ack */ - struct timespec last_keepalive_ack; + struct timespec last_keepalive_ack; /* keepalive2 ack stamp */ struct delayed_work work; /* send|recv work */ unsigned long delay; /* current delay interval */ -- cgit v1.2.3 From 335c25858218e76ef47f92ecb9d22e919d36140d Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 14 Sep 2015 12:44:22 +0300 Subject: libceph: advertise support for keepalive2 We are the client, but advertise keepalive2 anyway - for consistency, if nothing else. In the future the server might want to know whether its clients support keepalive2. Signed-off-by: Ilya Dryomov Reviewed-by: Yan, Zheng --- include/linux/ceph/ceph_features.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 4763ad64e832..f89b31d45cc8 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -107,6 +107,7 @@ static inline u64 ceph_sanitize_features(u64 features) CEPH_FEATURE_OSDMAP_ENC | \ CEPH_FEATURE_CRUSH_TUNABLES3 | \ CEPH_FEATURE_OSD_PRIMARY_AFFINITY | \ + CEPH_FEATURE_MSGR_KEEPALIVE2 | \ CEPH_FEATURE_CRUSH_V4) #define CEPH_FEATURES_REQUIRED_DEFAULT \ -- cgit v1.2.3 From 0fdea1e8a2853f79d39b8555cc9de16a7e0ab26f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 16 Sep 2015 23:43:17 -0400 Subject: SUNRPC: Ensure that we wait for connections to complete before retrying Commit 718ba5b87343, moved the responsibility for unlocking the socket to xs_tcp_setup_socket, meaning that the socket will be unlocked before we know that it has finished trying to connect. The following patch is based on an initial patch by Russell King to ensure that we delay clearing the XPRT_CONNECTING flag until we either know that we failed to initiate a connection attempt, or the connection attempt itself failed. Fixes: 718ba5b87343 ("SUNRPC: Add helpers to prevent socket create from racing") Reported-by: Russell King Reported-by: Russell King Tested-by: Russell King Tested-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprtsock.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 7591788e9fbf..357e44c1a46b 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -42,6 +42,7 @@ struct sock_xprt { /* * Connection of transports */ + unsigned long sock_state; struct delayed_work connect_worker; struct sockaddr_storage srcaddr; unsigned short srcport; @@ -76,6 +77,8 @@ struct sock_xprt { */ #define TCP_RPC_REPLY (1UL << 6) +#define XPRT_SOCK_CONNECTING 1U + #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_XPRTSOCK_H */ -- cgit v1.2.3 From 97b59c3a91d5ee4777658ff2136d1fdf13bd23d0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 18 Sep 2015 14:32:54 -0500 Subject: netfilter: ebtables: Simplify the arguments to ebt_do_table Nearly everything thing of interest to ebt_do_table is already present in nf_hook_state. Simplify ebt_do_table by just passing in the skb, nf_hook_state, and the table. This make the code easier to read and maintenance easier. To support this create an nf_hook_state on the stack in ebt_broute (the only caller without a nf_hook_state already available). This new nf_hook_state adds no new computations to ebt_broute, but does use a few more bytes of stack. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge/ebtables.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 8ca6d6464ea3..2ea517c7c6b9 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -111,9 +111,9 @@ struct ebt_table { extern struct ebt_table *ebt_register_table(struct net *net, const struct ebt_table *table); extern void ebt_unregister_table(struct net *net, struct ebt_table *table); -extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - struct ebt_table *table); +extern unsigned int ebt_do_table(struct sk_buff *skb, + const struct nf_hook_state *state, + struct ebt_table *table); /* Used in the kernel match() functions */ #define FWINV(bool,invflg) ((bool) ^ !!(info->invflags & invflg)) -- cgit v1.2.3 From 6cb8ff3f1a535b1d8eb5ea318932513d08eb3da7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 18 Sep 2015 14:32:55 -0500 Subject: inet netfilter: Remove hook from ip6t_do_table, arp_do_table, ipt_do_table The values of ops->hooknum and state->hook are guaraneted to be equal making the hook argument to ip6t_do_table, arp_do_table, and ipt_do_table is unnecessary. Remove the unnecessary hook argument. In the callers use state->hook instead of ops->hooknum for clarity and to reduce the number of cachelines the callers touch. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_arp/arp_tables.h | 1 - include/linux/netfilter_ipv4/ip_tables.h | 1 - include/linux/netfilter_ipv6/ip6_tables.h | 1 - 3 files changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index c22a7fb8d0df..6f074db2f23d 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -53,7 +53,6 @@ extern struct xt_table *arpt_register_table(struct net *net, const struct arpt_replace *repl); extern void arpt_unregister_table(struct xt_table *table); extern unsigned int arpt_do_table(struct sk_buff *skb, - unsigned int hook, const struct nf_hook_state *state, struct xt_table *table); diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 4073510da485..aa598f942c01 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -64,7 +64,6 @@ struct ipt_error { extern void *ipt_alloc_initial_table(const struct xt_table *); extern unsigned int ipt_do_table(struct sk_buff *skb, - unsigned int hook, const struct nf_hook_state *state, struct xt_table *table); diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index b40d2b635778..0f76e5c674f9 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -30,7 +30,6 @@ extern struct xt_table *ip6t_register_table(struct net *net, const struct ip6t_replace *repl); extern void ip6t_unregister_table(struct net *net, struct xt_table *table); extern unsigned int ip6t_do_table(struct sk_buff *skb, - unsigned int hook, const struct nf_hook_state *state, struct xt_table *table); -- cgit v1.2.3 From 156c196f6038610770588a708b9e0f7df2ead74a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 18 Sep 2015 14:32:58 -0500 Subject: netfilter: x_tables: Pass struct net in xt_action_param As xt_action_param lives on the stack this does not bloat any persistent data structures. This is a first step in making netfilter code that needs to know which network namespace it is executing in simpler. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index b006b719183f..c5577410c25d 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -13,6 +13,7 @@ * @target: the target extension * @matchinfo: per-match data * @targetinfo: per-target data + * @net network namespace through which the action was invoked * @in: input netdevice * @out: output netdevice * @fragoff: packet is a fragment, this is the data offset @@ -24,7 +25,6 @@ * Fields written to by extensions: * * @hotdrop: drop packet if we had inspection problems - * Network namespace obtainable using dev_net(in/out) */ struct xt_action_param { union { @@ -34,6 +34,7 @@ struct xt_action_param { union { const void *matchinfo, *targinfo; }; + struct net *net; const struct net_device *in, *out; int fragoff; unsigned int thoff; -- cgit v1.2.3 From 06198b34a3e09e06d9aecaa3727e0d37206cea77 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 18 Sep 2015 14:33:06 -0500 Subject: netfilter: Pass priv instead of nf_hook_ops to netfilter hooks Only pass the void *priv parameter out of the nf_hook_ops. That is all any of the functions are interested now, and by limiting what is passed it becomes simpler to change implementation details. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 0b4d4560f33d..987c74cd523c 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -80,7 +80,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, p->okfn = okfn; } -typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops, +typedef unsigned int nf_hookfn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); -- cgit v1.2.3 From b7f76ea2ef6739ee484a165ffbac98deb855d3d3 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 18 Sep 2015 23:41:23 +0200 Subject: security: fix typo in security_task_prctl Signed-off-by: Jann Horn Reviewed-by: Andy Lutomirski Signed-off-by: Linus Torvalds --- include/linux/security.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 79d85ddf8093..2f4c1f7aa7db 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -946,7 +946,7 @@ static inline int security_task_prctl(int option, unsigned long arg2, unsigned long arg4, unsigned long arg5) { - return cap_task_prctl(option, arg2, arg3, arg3, arg5); + return cap_task_prctl(option, arg2, arg3, arg4, arg5); } static inline void security_task_to_inode(struct task_struct *p, struct inode *inode) -- cgit v1.2.3 From 66e8c57da6bf6b847a48a5a6fda59512f733ed78 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 25 Aug 2015 20:45:18 +0200 Subject: rcu: Change _wait_rcu_gp() to work around GCC bug 67055 Code like this in inline functions confuses some recent versions of gcc: const int n = const-expr; whatever_t array[n]; For more details, see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67055#c13 This compiler bug results in the following failure after 114b7fd4b (rcu: Create rcu_sync infrastructure): In file included from include/linux/rcupdate.h:429:0, from include/linux/rcu_sync.h:5, from kernel/rcu/sync.c:1: include/linux/rcutiny.h: In function 'rcu_barrier_sched': include/linux/rcutiny.h:55:20: internal compiler error: Segmentation fault static inline void rcu_barrier_sched(void) This commit therefore eliminates the constant local variable in favor of direct use of the expression. Reported-and-tested-by: Mark Salter Reported-by: Guenter Roeck Signed-off-by: Oleg Nesterov Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index ff476515f716..581abf848566 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -230,12 +230,11 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array, struct rcu_synchronize *rs_array); #define _wait_rcu_gp(checktiny, ...) \ -do { \ - call_rcu_func_t __crcu_array[] = { __VA_ARGS__ }; \ - const int __n = ARRAY_SIZE(__crcu_array); \ - struct rcu_synchronize __rs_array[__n]; \ - \ - __wait_rcu_gp(checktiny, __n, __crcu_array, __rs_array); \ +do { \ + call_rcu_func_t __crcu_array[] = { __VA_ARGS__ }; \ + struct rcu_synchronize __rs_array[ARRAY_SIZE(__crcu_array)]; \ + __wait_rcu_gp(checktiny, ARRAY_SIZE(__crcu_array), \ + __crcu_array, __rs_array); \ } while (0) #define wait_rcu_gp(...) _wait_rcu_gp(false, __VA_ARGS__) -- cgit v1.2.3 From 2785968cd122b22b289db565b7438f2200984044 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sat, 9 May 2015 17:47:52 +0200 Subject: can: headers: make header files self contained This patch adds the missing #include-s to the dev.h and led.h, so that they can be used without including further header files. Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 3 ++- include/linux/can/led.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index c3a9c8fc60fa..56dcadd83716 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -14,9 +14,10 @@ #define _CAN_DEV_H #include -#include #include #include +#include +#include /* * CAN mode diff --git a/include/linux/can/led.h b/include/linux/can/led.h index 146de4506d21..2746f7c2f87d 100644 --- a/include/linux/can/led.h +++ b/include/linux/can/led.h @@ -11,6 +11,7 @@ #include #include +#include enum can_led_event { CAN_LED_EVENT_OPEN, -- cgit v1.2.3 From 0f1c28ae74bb1a34d36fca2db5161611d58b3148 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Fri, 18 Sep 2015 11:36:14 -0700 Subject: tcp: usec resolution SYN/ACK RTT Currently SYN/ACK RTT is measured in jiffies. For LAN the SYN/ACK RTT is often measured as 0ms or sometimes 1ms, which would affect RTT estimation and min RTT samping used by some congestion control. This patch improves SYN/ACK RTT to be usec resolution if platform supports it. While the timestamping of SYN/ACK is done in request sock, the RTT measurement is carefully arranged to avoid storing another u64 timestamp in tcp_sock. For regular handshake w/o SYNACK retransmission, the RTT is sampled right after the child socket is created and right before the request sock is released (tcp_check_req() in tcp_minisocks.c) For Fast Open the child socket is already created when SYN/ACK was sent, the RTT is sampled in tcp_rcv_state_process() after processing the final ACK an right before the request socket is released. If the SYN/ACK was retransmistted or SYN-cookie was used, we rely on TCP timestamps to measure the RTT. The sample is taken at the same place in tcp_rcv_state_process() after the timestamp values are validated in tcp_validate_incoming(). Note that we do not store TS echo value in request_sock for SYN-cookies, because the value is already stored in tp->rx_opt used by tcp_ack_update_rtt(). One side benefit is that the RTT measurement now happens before initializing congestion control (of the passive side). Therefore the congestion control can use the SYN/ACK RTT. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 937b97893d5f..fcb573be75d9 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -112,11 +112,11 @@ struct tcp_request_sock_ops; struct tcp_request_sock { struct inet_request_sock req; const struct tcp_request_sock_ops *af_specific; + struct skb_mstamp snt_synack; /* first SYNACK sent time */ bool tfo_listener; u32 txhash; u32 rcv_isn; u32 snt_isn; - u32 snt_synack; /* synack sent time */ u32 last_oow_ack_time; /* last SYNACK */ u32 rcv_nxt; /* the ack # by SYNACK. For * FastOpen it's the seq# -- cgit v1.2.3 From 87a93e4eceb495f93e3f37b100334d2641765b6c Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 18 Sep 2015 11:30:43 +0200 Subject: ieee802154: change needed headroom/tailroom This patch cleanups needed_headroom, needed_tailroom and hard_header_len fields for wpan and lowpan interfaces. For wpan interfaces the worst case mac header len should be part of needed_headroom, currently this is set as hard_header_len, but hard_header_len should be set to the minimum header length which xmit call assumes and this is the minimum frame length of 802.15.4. The hard_header_len value will check inside send callbacl of AF_PACKET raw sockets. For lowpan interfaces, if fragmentation isn't needed the skb will call dev_hard_header for 802154 layer and queue it afterwards. This happens without new skb allocation, so we need the same headroom and tailroom lengths like 802154 inside 802154 6lowpan layer. At least we assume as minimum header length an ipv6 header size. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index db01492814d3..205ce4e1ac32 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -31,6 +31,17 @@ #define IEEE802154_ACK_PSDU_LEN 5 #define IEEE802154_MIN_PSDU_LEN 9 #define IEEE802154_FCS_LEN 2 +#define IEEE802154_MAX_AUTH_TAG_LEN 16 + +/* General MAC frame format: + * 2 bytes: Frame Control + * 1 byte: Sequence Number + * 20 bytes: Addressing fields + * 14 bytes: Auxiliary Security Header + */ +#define IEEE802154_MAX_HEADER_LEN (2 + 1 + 20 + 14) +#define IEEE802154_MIN_HEADER_LEN (IEEE802154_ACK_PSDU_LEN - \ + IEEE802154_FCS_LEN) #define IEEE802154_PAN_ID_BROADCAST 0xffff #define IEEE802154_ADDR_SHORT_BROADCAST 0xffff -- cgit v1.2.3 From 79750ac4257763ff595a8b2cdc7ba580f0b0c8e0 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 21 Sep 2015 11:24:33 +0200 Subject: ieee802154: add helpers for frame control checks This patch introduce two static inline functions. The first to get the frame control field from an sk_buff. The second is for checking on the acknowledgment request bit on the frame control field. Later we can introduce more functions to check on the frame control fields. These will deprecate the current behaviour which requires a host-byteorder conversion and manually bit handling. Reviewed-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 205ce4e1ac32..aca228b81464 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -25,6 +25,8 @@ #include #include +#include +#include #include #define IEEE802154_MTU 127 @@ -218,6 +220,7 @@ enum { /* frame control handling */ #define IEEE802154_FCTL_FTYPE 0x0003 +#define IEEE802154_FCTL_ACKREQ 0x0020 #define IEEE802154_FCTL_INTRA_PAN 0x0040 #define IEEE802154_FTYPE_DATA 0x0001 @@ -232,6 +235,15 @@ static inline int ieee802154_is_data(__le16 fc) cpu_to_le16(IEEE802154_FTYPE_DATA); } +/** + * ieee802154_is_ackreq - check if acknowledgment request bit is set + * @fc: frame control bytes in little-endian byteorder + */ +static inline bool ieee802154_is_ackreq(__le16 fc) +{ + return fc & cpu_to_le16(IEEE802154_FCTL_ACKREQ); +} + /** * ieee802154_is_intra_pan - check if intra pan id communication * @fc: frame control bytes in little-endian byteorder -- cgit v1.2.3 From e3abc8ff0fc18b3925fd5d5c5fbd1613856f4e7c Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 16 Aug 2015 11:13:22 +0300 Subject: mac80211: allow to transmit A-MSDU within A-MPDU Advertise the capability to send A-MSDU within A-MPDU in the AddBA request sent by mac80211. Let the driver know about the peer's capabilities. Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index cfa906f28b7a..19eb9ecd6cf3 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1379,6 +1379,7 @@ struct ieee80211_ht_operation { /* block-ack parameters */ +#define IEEE80211_ADDBA_PARAM_AMSDU_MASK 0x0001 #define IEEE80211_ADDBA_PARAM_POLICY_MASK 0x0002 #define IEEE80211_ADDBA_PARAM_TID_MASK 0x003C #define IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK 0xFFC0 -- cgit v1.2.3 From 0edd5faeb07bfd3ec5402f9467e4c169dcd131e8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 28 Aug 2015 14:31:48 +0200 Subject: wireless: mark element IDs 8 and 9 reserved These were never used in the tree, and are marked as reserved in the IEEE 802.11 documentation (ANA). Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 19eb9ecd6cf3..f79a02a69d26 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1746,8 +1746,7 @@ enum ieee80211_eid { WLAN_EID_TIM = 5, WLAN_EID_IBSS_PARAMS = 6, WLAN_EID_COUNTRY = 7, - WLAN_EID_HP_PARAMS = 8, - WLAN_EID_HP_TABLE = 9, + /* 8, 9 reserved */ WLAN_EID_REQUEST = 10, WLAN_EID_QBSS_LOAD = 11, WLAN_EID_EDCA_PARAM_SET = 12, -- cgit v1.2.3 From ac5be6b47e8bd25b62bed2c82cda7398999f59e9 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Tue, 22 Sep 2015 14:58:49 -0700 Subject: userfaultfd: revert "userfaultfd: waitqueue: add nr wake parameter to __wake_up_locked_key" This reverts commit 51360155eccb907ff8635bd10fc7de876408c2e0 and adapts fs/userfaultfd.c to use the old version of that function. It didn't look robust to call __wake_up_common with "nr == 1" when we absolutely require wakeall semantics, but we've full control of what we insert in the two waitqueue heads of the blocked userfaults. No exclusive waitqueue risks to be inserted into those two waitqueue heads so we can as well stick to "nr == 1" of the old code and we can rely purely on the fact no waitqueue inserted in one of the two waitqueue heads we must enforce as wakeall, has wait->flags WQ_FLAG_EXCLUSIVE set. Signed-off-by: Andrea Arcangeli Cc: Dr. David Alan Gilbert Cc: Michael Ellerman Cc: Shuah Khan Cc: Thierry Reding Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/wait.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index d3d077228d4c..1e1bf9f963a9 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -147,8 +147,7 @@ __remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old) typedef int wait_bit_action_f(struct wait_bit_key *); void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key); -void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, int nr, - void *key); +void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key); void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key); void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr); void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr); @@ -180,7 +179,7 @@ wait_queue_head_t *bit_waitqueue(void *, int); #define wake_up_poll(x, m) \ __wake_up(x, TASK_NORMAL, 1, (void *) (m)) #define wake_up_locked_poll(x, m) \ - __wake_up_locked_key((x), TASK_NORMAL, 1, (void *) (m)) + __wake_up_locked_key((x), TASK_NORMAL, (void *) (m)) #define wake_up_interruptible_poll(x, m) \ __wake_up(x, TASK_INTERRUPTIBLE, 1, (void *) (m)) #define wake_up_interruptible_sync_poll(x, m) \ -- cgit v1.2.3 From cb334648a10c7fa6f0f163c22602f4dc1c6d56b4 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 5 May 2015 10:05:47 -0700 Subject: arcnet: Use normal kernel spacing style Standardized spacing is easier to read. git diff -w shows no differences. objdiff shows no differences. Signed-off-by: Joe Perches Signed-off-by: Michael Grzeschik --- include/linux/arcdevice.h | 90 +++++++++++++++++++++++------------------------ 1 file changed, 45 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h index df0356220730..ccfd1d2f984b 100644 --- a/include/linux/arcdevice.h +++ b/include/linux/arcdevice.h @@ -51,7 +51,7 @@ /* * Debugging bitflags: each option can be enabled individually. - * + * * Note: only debug flags included in the ARCNET_DEBUG_MAX define will * actually be available. GCC will (at least, GCC 2.7.0 will) notice * lines using a BUGLVL not in ARCNET_DEBUG_MAX and automatically optimize @@ -77,33 +77,33 @@ #endif #ifndef ARCNET_DEBUG -#define ARCNET_DEBUG (D_NORMAL|D_EXTRA) +#define ARCNET_DEBUG (D_NORMAL | D_EXTRA) #endif extern int arcnet_debug; /* macros to simplify debug checking */ -#define BUGLVL(x) if ((ARCNET_DEBUG_MAX)&arcnet_debug&(x)) -#define BUGMSG2(x,msg,args...) do { BUGLVL(x) printk(msg, ## args); } while (0) -#define BUGMSG(x,msg,args...) \ - BUGMSG2(x, "%s%6s: " msg, \ - x==D_NORMAL ? KERN_WARNING \ - : x < D_DURING ? KERN_INFO : KERN_DEBUG, \ - dev->name , ## args) +#define BUGLVL(x) if ((ARCNET_DEBUG_MAX) & arcnet_debug & (x)) +#define BUGMSG2(x, msg, args...) do { BUGLVL(x) printk(msg, ## args); } while (0) +#define BUGMSG(x, msg, args...) \ + BUGMSG2(x, "%s%6s: " msg, \ + x == D_NORMAL ? KERN_WARNING \ + : x < D_DURING ? KERN_INFO : KERN_DEBUG, \ + dev->name, ## args) /* see how long a function call takes to run, expressed in CPU cycles */ -#define TIME(name, bytes, call) BUGLVL(D_TIMING) { \ - unsigned long _x, _y; \ - _x = get_cycles(); \ - call; \ - _y = get_cycles(); \ - BUGMSG(D_TIMING, \ - "%s: %d bytes in %lu cycles == " \ - "%lu Kbytes/100Mcycle\n",\ - name, bytes, _y - _x, \ - 100000000 / 1024 * bytes / (_y - _x + 1));\ - } \ - else { \ - call;\ +#define TIME(name, bytes, call) BUGLVL(D_TIMING) { \ + unsigned long _x, _y; \ + _x = get_cycles(); \ + call; \ + _y = get_cycles(); \ + BUGMSG(D_TIMING, \ + "%s: %d bytes in %lu cycles == " \ + "%lu Kbytes/100Mcycle\n", \ + name, bytes, _y - _x, \ + 100000000 / 1024 * bytes / (_y - _x + 1)); \ + } \ + else { \ + call; \ } @@ -189,16 +189,16 @@ struct ArcProto { int mtu; /* largest possible packet */ int is_ip; /* This is a ip plugin - not a raw thing */ - void (*rx) (struct net_device * dev, int bufnum, - struct archdr * pkthdr, int length); - int (*build_header) (struct sk_buff * skb, struct net_device *dev, - unsigned short ethproto, uint8_t daddr); + void (*rx)(struct net_device *dev, int bufnum, + struct archdr *pkthdr, int length); + int (*build_header)(struct sk_buff *skb, struct net_device *dev, + unsigned short ethproto, uint8_t daddr); /* these functions return '1' if the skb can now be freed */ - int (*prepare_tx) (struct net_device * dev, struct archdr * pkt, int length, - int bufnum); - int (*continue_tx) (struct net_device * dev, int bufnum); - int (*ack_tx) (struct net_device * dev, int acked); + int (*prepare_tx)(struct net_device *dev, struct archdr *pkt, int length, + int bufnum); + int (*continue_tx)(struct net_device *dev, int bufnum); + int (*ack_tx)(struct net_device *dev, int acked); }; extern struct ArcProto *arc_proto_map[256], *arc_proto_default, @@ -263,13 +263,13 @@ struct arcnet_local { * situations in which we (for example) want to pre-load a transmit * buffer, or start receiving while we copy a received packet to * memory. - * + * * The rules: only the interrupt handler is allowed to _add_ buffers to * the queue; thus, this doesn't require a lock. Both the interrupt * handler and the transmit function will want to _remove_ buffers, so * we need to handle the situation where they try to do it at the same * time. - * + * * If next_buf == first_free_buf, the queue is empty. Since there are * only four possible buffers, the queue should never be full. */ @@ -298,17 +298,17 @@ struct arcnet_local { /* hardware-specific functions */ struct { struct module *owner; - void (*command) (struct net_device * dev, int cmd); - int (*status) (struct net_device * dev); - void (*intmask) (struct net_device * dev, int mask); - int (*reset) (struct net_device * dev, int really_reset); - void (*open) (struct net_device * dev); - void (*close) (struct net_device * dev); - - void (*copy_to_card) (struct net_device * dev, int bufnum, int offset, - void *buf, int count); - void (*copy_from_card) (struct net_device * dev, int bufnum, int offset, - void *buf, int count); + void (*command)(struct net_device *dev, int cmd); + int (*status)(struct net_device *dev); + void (*intmask)(struct net_device *dev, int mask); + int (*reset)(struct net_device *dev, int really_reset); + void (*open)(struct net_device *dev); + void (*close)(struct net_device *dev); + + void (*copy_to_card)(struct net_device *dev, int bufnum, int offset, + void *buf, int count); + void (*copy_from_card)(struct net_device *dev, int bufnum, int offset, + void *buf, int count); } hw; void __iomem *mem_start; /* pointer to ioremap'ed MMIO */ @@ -325,7 +325,7 @@ struct arcnet_local { #if ARCNET_DEBUG_MAX & D_SKB void arcnet_dump_skb(struct net_device *dev, struct sk_buff *skb, char *desc); #else -#define arcnet_dump_skb(dev,skb,desc) ; +#define arcnet_dump_skb(dev, skb, desc) ; #endif void arcnet_unregister_proto(struct ArcProto *proto); @@ -335,7 +335,7 @@ struct net_device *alloc_arcdev(const char *name); int arcnet_open(struct net_device *dev); int arcnet_close(struct net_device *dev); netdev_tx_t arcnet_send_packet(struct sk_buff *skb, - struct net_device *dev); + struct net_device *dev); void arcnet_timeout(struct net_device *dev); #endif /* __KERNEL__ */ -- cgit v1.2.3 From 01a1d5ac4e1a5890fd6c0d0ae900e1b6e4f851d6 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 5 May 2015 10:05:48 -0700 Subject: arcnet: Add and remove blank lines Use a more current kernel line style. Signed-off-by: Joe Perches Signed-off-by: Michael Grzeschik --- include/linux/arcdevice.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h index ccfd1d2f984b..78687885eb81 100644 --- a/include/linux/arcdevice.h +++ b/include/linux/arcdevice.h @@ -34,7 +34,6 @@ */ #define RECON_THRESHOLD 30 - /* * Define this to the minimum "timeout" value. If a transmit takes longer * than TX_TIMEOUT jiffies, Linux will abort the TX and retry. On a large @@ -44,11 +43,9 @@ */ #define TX_TIMEOUT (HZ * 200 / 1000) - /* Display warnings about the driver being an ALPHA version. */ #undef ALPHA_WARNING - /* * Debugging bitflags: each option can be enabled individually. * @@ -106,7 +103,6 @@ extern int arcnet_debug; call; \ } - /* * Time needed to reset the card - in ms (milliseconds). This works on my * SMC PC100. I can't find a reference that tells me just how long I @@ -182,7 +178,6 @@ extern int arcnet_debug; #define ARC_CAN_10MBIT 2 /* card uses COM20022, supporting 10MBit, but default is 2.5MBit. */ - /* information needed to define an encapsulation driver */ struct ArcProto { char suffix; /* a for RFC1201, e for ether-encap, etc. */ @@ -204,7 +199,6 @@ struct ArcProto { extern struct ArcProto *arc_proto_map[256], *arc_proto_default, *arc_bcast_proto, *arc_raw_proto; - /* * "Incoming" is information needed for each address that could be sending * to us. Mostly for partially-received split packets. @@ -216,7 +210,6 @@ struct Incoming { numpackets; /* number of packets in split */ }; - /* only needed for RFC1201 */ struct Outgoing { struct ArcProto *proto; /* protocol driver that owns this: @@ -230,7 +223,6 @@ struct Outgoing { numsegs; /* number of segments */ }; - struct arcnet_local { uint8_t config, /* current value of CONFIG register */ timeout, /* Extended timeout for COM20020 */ @@ -251,7 +243,6 @@ struct arcnet_local { char *card_name; /* card ident string */ int card_flags; /* special card features */ - /* On preemtive and SMB a lock is needed */ spinlock_t lock; @@ -314,14 +305,11 @@ struct arcnet_local { void __iomem *mem_start; /* pointer to ioremap'ed MMIO */ }; - #define ARCRESET(x) (lp->hw.reset(dev, (x))) #define ACOMMAND(x) (lp->hw.command(dev, (x))) #define ASTATUS() (lp->hw.status(dev)) #define AINTMASK(x) (lp->hw.intmask(dev, (x))) - - #if ARCNET_DEBUG_MAX & D_SKB void arcnet_dump_skb(struct net_device *dev, struct sk_buff *skb, char *desc); #else -- cgit v1.2.3 From d77510f3436e0db9b5e72fa8159ce26c3ac88d2d Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 5 May 2015 10:05:53 -0700 Subject: arcnet: Neaten BUGMSG macro defines These macros are actually printk and pr_cont uses with a flag. Add a new BUGLVL_TEST macro which is just the "should use" test and not an odd "if ()" macro to simplify uses in a new patch. Signed-off-by: Joe Perches Signed-off-by: Michael Grzeschik --- include/linux/arcdevice.h | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h index 78687885eb81..ad610208fbba 100644 --- a/include/linux/arcdevice.h +++ b/include/linux/arcdevice.h @@ -78,14 +78,24 @@ #endif extern int arcnet_debug; +#define BUGLVL_TEST(x) ((x) & ARCNET_DEBUG_MAX & arcnet_debug) +#define BUGLVL(x) if (BUGLVL_TEST(x)) + /* macros to simplify debug checking */ -#define BUGLVL(x) if ((ARCNET_DEBUG_MAX) & arcnet_debug & (x)) -#define BUGMSG2(x, msg, args...) do { BUGLVL(x) printk(msg, ## args); } while (0) -#define BUGMSG(x, msg, args...) \ - BUGMSG2(x, "%s%6s: " msg, \ - x == D_NORMAL ? KERN_WARNING \ - : x < D_DURING ? KERN_INFO : KERN_DEBUG, \ - dev->name, ## args) +#define BUGMSG(x, fmt, ...) \ +do { \ + if (BUGLVL_TEST(x)) \ + printk("%s%6s: " fmt, \ + (x) == D_NORMAL ? KERN_WARNING : \ + (x) < D_DURING ? KERN_INFO : KERN_DEBUG, \ + dev->name, ##__VA_ARGS__); \ +} while (0) + +#define BUGMSG2(x, fmt, ...) \ +do { \ + if (BUGLVL_TEST(x)) \ + printk(fmt, ##__VA_ARGS__); \ +} while (0) /* see how long a function call takes to run, expressed in CPU cycles */ #define TIME(name, bytes, call) BUGLVL(D_TIMING) { \ -- cgit v1.2.3 From 72aeea4841c037b9b3abf65859673cbd7b6664a9 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 5 May 2015 10:05:54 -0700 Subject: arcnet: Expand odd BUGLVL macro with if and uses Don't hide what should be obvious. Make the macro a simple test instead of using if and test. Signed-off-by: Joe Perches Signed-off-by: Michael Grzeschik --- include/linux/arcdevice.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h index ad610208fbba..f07c66383b88 100644 --- a/include/linux/arcdevice.h +++ b/include/linux/arcdevice.h @@ -78,13 +78,12 @@ #endif extern int arcnet_debug; -#define BUGLVL_TEST(x) ((x) & ARCNET_DEBUG_MAX & arcnet_debug) -#define BUGLVL(x) if (BUGLVL_TEST(x)) +#define BUGLVL(x) ((x) & ARCNET_DEBUG_MAX & arcnet_debug) /* macros to simplify debug checking */ #define BUGMSG(x, fmt, ...) \ do { \ - if (BUGLVL_TEST(x)) \ + if (BUGLVL(x)) \ printk("%s%6s: " fmt, \ (x) == D_NORMAL ? KERN_WARNING : \ (x) < D_DURING ? KERN_INFO : KERN_DEBUG, \ @@ -93,12 +92,14 @@ do { \ #define BUGMSG2(x, fmt, ...) \ do { \ - if (BUGLVL_TEST(x)) \ + if (BUGLVL(x)) \ printk(fmt, ##__VA_ARGS__); \ } while (0) /* see how long a function call takes to run, expressed in CPU cycles */ -#define TIME(name, bytes, call) BUGLVL(D_TIMING) { \ +#define TIME(name, bytes, call) \ +do { \ + if (BUGLVL(D_TIMING)) { \ unsigned long _x, _y; \ _x = get_cycles(); \ call; \ @@ -108,10 +109,10 @@ do { \ "%lu Kbytes/100Mcycle\n", \ name, bytes, _y - _x, \ 100000000 / 1024 * bytes / (_y - _x + 1)); \ - } \ - else { \ + } else { \ call; \ - } + } \ +} while (0) /* * Time needed to reset the card - in ms (milliseconds). This works on my -- cgit v1.2.3 From a34c0932c3b2f28542825ffc5280d562c49ad42d Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 5 May 2015 10:05:55 -0700 Subject: arcnet: Convert BUGMSG and BUGMSG2 to arc_prink and arc_cont These macros don't actually represent BUG uses but are more commonly used as logging macros, so use a more kernel style macro. Convert the BUGMSG from a netdev_ like use to actually use netdev_. Signed-off-by: Joe Perches Signed-off-by: Michael Grzeschik --- include/linux/arcdevice.h | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h index f07c66383b88..a678027ff6c2 100644 --- a/include/linux/arcdevice.h +++ b/include/linux/arcdevice.h @@ -81,34 +81,36 @@ extern int arcnet_debug; #define BUGLVL(x) ((x) & ARCNET_DEBUG_MAX & arcnet_debug) /* macros to simplify debug checking */ -#define BUGMSG(x, fmt, ...) \ +#define arc_printk(x, dev, fmt, ...) \ do { \ - if (BUGLVL(x)) \ - printk("%s%6s: " fmt, \ - (x) == D_NORMAL ? KERN_WARNING : \ - (x) < D_DURING ? KERN_INFO : KERN_DEBUG, \ - dev->name, ##__VA_ARGS__); \ + if (BUGLVL(x)) { \ + if ((x) == D_NORMAL) \ + netdev_warn(dev, fmt, ##__VA_ARGS__); \ + else if ((x) < D_DURING) \ + netdev_info(dev, fmt, ##__VA_ARGS__); \ + else \ + netdev_dbg(dev, fmt, ##__VA_ARGS__); \ + } \ } while (0) -#define BUGMSG2(x, fmt, ...) \ +#define arc_cont(x, fmt, ...) \ do { \ - if (BUGLVL(x)) \ - printk(fmt, ##__VA_ARGS__); \ + if (BUGLVL(x)) \ + pr_cont(fmt, ##__VA_ARGS__); \ } while (0) /* see how long a function call takes to run, expressed in CPU cycles */ -#define TIME(name, bytes, call) \ +#define TIME(dev, name, bytes, call) \ do { \ if (BUGLVL(D_TIMING)) { \ unsigned long _x, _y; \ _x = get_cycles(); \ call; \ _y = get_cycles(); \ - BUGMSG(D_TIMING, \ - "%s: %d bytes in %lu cycles == " \ - "%lu Kbytes/100Mcycle\n", \ - name, bytes, _y - _x, \ - 100000000 / 1024 * bytes / (_y - _x + 1)); \ + arc_printk(D_TIMING, dev, \ + "%s: %d bytes in %lu cycles == %lu Kbytes/100Mcycle\n", \ + name, bytes, _y - _x, \ + 100000000 / 1024 * bytes / (_y - _x + 1)); \ } else { \ call; \ } \ -- cgit v1.2.3 From 83df99b50f901cb7c72cf132a83f43bbaeb01362 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 5 May 2015 10:06:01 -0700 Subject: arcnet: Convert arcnet_dump_skb macro to static inline Make sure the arguments are tested appropriately when not using this function. Signed-off-by: Joe Perches Signed-off-by: Michael Grzeschik --- include/linux/arcdevice.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h index a678027ff6c2..1d8e36e13616 100644 --- a/include/linux/arcdevice.h +++ b/include/linux/arcdevice.h @@ -326,7 +326,10 @@ struct arcnet_local { #if ARCNET_DEBUG_MAX & D_SKB void arcnet_dump_skb(struct net_device *dev, struct sk_buff *skb, char *desc); #else -#define arcnet_dump_skb(dev, skb, desc) ; +static inline +void arcnet_dump_skb(struct net_device *dev, struct sk_buff *skb, char *desc) +{ +} #endif void arcnet_unregister_proto(struct ArcProto *proto); -- cgit v1.2.3 From d6d7d3ed56e3bfe7fd34108dbe23f0610e3d8621 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 5 May 2015 10:06:02 -0700 Subject: arcnet: Wrap some long lines Just neatening. Signed-off-by: Joe Perches Signed-off-by: Michael Grzeschik --- include/linux/arcdevice.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h index 1d8e36e13616..9ca135d0f114 100644 --- a/include/linux/arcdevice.h +++ b/include/linux/arcdevice.h @@ -203,8 +203,8 @@ struct ArcProto { unsigned short ethproto, uint8_t daddr); /* these functions return '1' if the skb can now be freed */ - int (*prepare_tx)(struct net_device *dev, struct archdr *pkt, int length, - int bufnum); + int (*prepare_tx)(struct net_device *dev, struct archdr *pkt, + int length, int bufnum); int (*continue_tx)(struct net_device *dev, int bufnum); int (*ack_tx)(struct net_device *dev, int acked); }; @@ -309,10 +309,10 @@ struct arcnet_local { void (*open)(struct net_device *dev); void (*close)(struct net_device *dev); - void (*copy_to_card)(struct net_device *dev, int bufnum, int offset, - void *buf, int count); - void (*copy_from_card)(struct net_device *dev, int bufnum, int offset, - void *buf, int count); + void (*copy_to_card)(struct net_device *dev, int bufnum, + int offset, void *buf, int count); + void (*copy_from_card)(struct net_device *dev, int bufnum, + int offset, void *buf, int count); } hw; void __iomem *mem_start; /* pointer to ioremap'ed MMIO */ -- cgit v1.2.3 From 26c6d281688e8bb8154fa78c60e551d024f5d0b8 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 5 May 2015 10:06:03 -0700 Subject: arcnet: Move files out of include/linux These #include files don't need to be in the include/linux directory as they can be local to drivers/net/arcnet/ Move them and update the #include statements. Update the MAINTAINERS file pattern by deleting arcdevice from the NETWORKING block as arcnet is currently unmaintained. Signed-off-by: Joe Perches Signed-off-by: Michael Grzeschik --- include/linux/arcdevice.h | 346 ---------------------------------------------- include/linux/com20020.h | 145 ------------------- 2 files changed, 491 deletions(-) delete mode 100644 include/linux/arcdevice.h delete mode 100644 include/linux/com20020.h (limited to 'include/linux') diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h deleted file mode 100644 index 9ca135d0f114..000000000000 --- a/include/linux/arcdevice.h +++ /dev/null @@ -1,346 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. NET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * Definitions used by the ARCnet driver. - * - * Authors: Avery Pennarun and David Woodhouse - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ -#ifndef _LINUX_ARCDEVICE_H -#define _LINUX_ARCDEVICE_H - -#include -#include - -#ifdef __KERNEL__ -#include - -/* - * RECON_THRESHOLD is the maximum number of RECON messages to receive - * within one minute before printing a "cabling problem" warning. The - * default value should be fine. - * - * After that, a "cabling restored" message will be printed on the next IRQ - * if no RECON messages have been received for 10 seconds. - * - * Do not define RECON_THRESHOLD at all if you want to disable this feature. - */ -#define RECON_THRESHOLD 30 - -/* - * Define this to the minimum "timeout" value. If a transmit takes longer - * than TX_TIMEOUT jiffies, Linux will abort the TX and retry. On a large - * network, or one with heavy network traffic, this timeout may need to be - * increased. The larger it is, though, the longer it will be between - * necessary transmits - don't set this too high. - */ -#define TX_TIMEOUT (HZ * 200 / 1000) - -/* Display warnings about the driver being an ALPHA version. */ -#undef ALPHA_WARNING - -/* - * Debugging bitflags: each option can be enabled individually. - * - * Note: only debug flags included in the ARCNET_DEBUG_MAX define will - * actually be available. GCC will (at least, GCC 2.7.0 will) notice - * lines using a BUGLVL not in ARCNET_DEBUG_MAX and automatically optimize - * them out. - */ -#define D_NORMAL 1 /* important operational info */ -#define D_EXTRA 2 /* useful, but non-vital information */ -#define D_INIT 4 /* show init/probe messages */ -#define D_INIT_REASONS 8 /* show reasons for discarding probes */ -#define D_RECON 32 /* print a message whenever token is lost */ -#define D_PROTO 64 /* debug auto-protocol support */ -/* debug levels below give LOTS of output during normal operation! */ -#define D_DURING 128 /* trace operations (including irq's) */ -#define D_TX 256 /* show tx packets */ -#define D_RX 512 /* show rx packets */ -#define D_SKB 1024 /* show skb's */ -#define D_SKB_SIZE 2048 /* show skb sizes */ -#define D_TIMING 4096 /* show time needed to copy buffers to card */ -#define D_DEBUG 8192 /* Very detailed debug line for line */ - -#ifndef ARCNET_DEBUG_MAX -#define ARCNET_DEBUG_MAX (127) /* change to ~0 if you want detailed debugging */ -#endif - -#ifndef ARCNET_DEBUG -#define ARCNET_DEBUG (D_NORMAL | D_EXTRA) -#endif -extern int arcnet_debug; - -#define BUGLVL(x) ((x) & ARCNET_DEBUG_MAX & arcnet_debug) - -/* macros to simplify debug checking */ -#define arc_printk(x, dev, fmt, ...) \ -do { \ - if (BUGLVL(x)) { \ - if ((x) == D_NORMAL) \ - netdev_warn(dev, fmt, ##__VA_ARGS__); \ - else if ((x) < D_DURING) \ - netdev_info(dev, fmt, ##__VA_ARGS__); \ - else \ - netdev_dbg(dev, fmt, ##__VA_ARGS__); \ - } \ -} while (0) - -#define arc_cont(x, fmt, ...) \ -do { \ - if (BUGLVL(x)) \ - pr_cont(fmt, ##__VA_ARGS__); \ -} while (0) - -/* see how long a function call takes to run, expressed in CPU cycles */ -#define TIME(dev, name, bytes, call) \ -do { \ - if (BUGLVL(D_TIMING)) { \ - unsigned long _x, _y; \ - _x = get_cycles(); \ - call; \ - _y = get_cycles(); \ - arc_printk(D_TIMING, dev, \ - "%s: %d bytes in %lu cycles == %lu Kbytes/100Mcycle\n", \ - name, bytes, _y - _x, \ - 100000000 / 1024 * bytes / (_y - _x + 1)); \ - } else { \ - call; \ - } \ -} while (0) - -/* - * Time needed to reset the card - in ms (milliseconds). This works on my - * SMC PC100. I can't find a reference that tells me just how long I - * should wait. - */ -#define RESETtime (300) - -/* - * These are the max/min lengths of packet payload, not including the - * arc_hardware header, but definitely including the soft header. - * - * Note: packet sizes 254, 255, 256 are impossible because of the way - * ARCnet registers work That's why RFC1201 defines "exception" packets. - * In non-RFC1201 protocols, we have to just tack some extra bytes on the - * end. - */ -#define MTU 253 /* normal packet max size */ -#define MinTU 257 /* extended packet min size */ -#define XMTU 508 /* extended packet max size */ - -/* status/interrupt mask bit fields */ -#define TXFREEflag 0x01 /* transmitter available */ -#define TXACKflag 0x02 /* transmitted msg. ackd */ -#define RECONflag 0x04 /* network reconfigured */ -#define TESTflag 0x08 /* test flag */ -#define EXCNAKflag 0x08 /* excesive nak flag */ -#define RESETflag 0x10 /* power-on-reset */ -#define RES1flag 0x20 /* reserved - usually set by jumper */ -#define RES2flag 0x40 /* reserved - usually set by jumper */ -#define NORXflag 0x80 /* receiver inhibited */ - -/* Flags used for IO-mapped memory operations */ -#define AUTOINCflag 0x40 /* Increase location with each access */ -#define IOMAPflag 0x02 /* (for 90xx) Use IO mapped memory, not mmap */ -#define ENABLE16flag 0x80 /* (for 90xx) Enable 16-bit mode */ - -/* in the command register, the following bits have these meanings: - * 0-2 command - * 3-4 page number (for enable rcv/xmt command) - * 7 receive broadcasts - */ -#define NOTXcmd 0x01 /* disable transmitter */ -#define NORXcmd 0x02 /* disable receiver */ -#define TXcmd 0x03 /* enable transmitter */ -#define RXcmd 0x04 /* enable receiver */ -#define CONFIGcmd 0x05 /* define configuration */ -#define CFLAGScmd 0x06 /* clear flags */ -#define TESTcmd 0x07 /* load test flags */ - -/* flags for "clear flags" command */ -#define RESETclear 0x08 /* power-on-reset */ -#define CONFIGclear 0x10 /* system reconfigured */ - -#define EXCNAKclear 0x0E /* Clear and acknowledge the excive nak bit */ - -/* flags for "load test flags" command */ -#define TESTload 0x08 /* test flag (diagnostic) */ - -/* byte deposited into first address of buffers on reset */ -#define TESTvalue 0321 /* that's octal for 0xD1 :) */ - -/* for "enable receiver" command */ -#define RXbcasts 0x80 /* receive broadcasts */ - -/* flags for "define configuration" command */ -#define NORMALconf 0x00 /* 1-249 byte packets */ -#define EXTconf 0x08 /* 250-504 byte packets */ - -/* card feature flags, set during auto-detection. - * (currently only used by com20020pci) - */ -#define ARC_IS_5MBIT 1 /* card default speed is 5MBit */ -#define ARC_CAN_10MBIT 2 /* card uses COM20022, supporting 10MBit, - but default is 2.5MBit. */ - -/* information needed to define an encapsulation driver */ -struct ArcProto { - char suffix; /* a for RFC1201, e for ether-encap, etc. */ - int mtu; /* largest possible packet */ - int is_ip; /* This is a ip plugin - not a raw thing */ - - void (*rx)(struct net_device *dev, int bufnum, - struct archdr *pkthdr, int length); - int (*build_header)(struct sk_buff *skb, struct net_device *dev, - unsigned short ethproto, uint8_t daddr); - - /* these functions return '1' if the skb can now be freed */ - int (*prepare_tx)(struct net_device *dev, struct archdr *pkt, - int length, int bufnum); - int (*continue_tx)(struct net_device *dev, int bufnum); - int (*ack_tx)(struct net_device *dev, int acked); -}; - -extern struct ArcProto *arc_proto_map[256], *arc_proto_default, - *arc_bcast_proto, *arc_raw_proto; - -/* - * "Incoming" is information needed for each address that could be sending - * to us. Mostly for partially-received split packets. - */ -struct Incoming { - struct sk_buff *skb; /* packet data buffer */ - __be16 sequence; /* sequence number of assembly */ - uint8_t lastpacket, /* number of last packet (from 1) */ - numpackets; /* number of packets in split */ -}; - -/* only needed for RFC1201 */ -struct Outgoing { - struct ArcProto *proto; /* protocol driver that owns this: - * if NULL, no packet is pending. - */ - struct sk_buff *skb; /* buffer from upper levels */ - struct archdr *pkt; /* a pointer into the skb */ - uint16_t length, /* bytes total */ - dataleft, /* bytes left */ - segnum, /* segment being sent */ - numsegs; /* number of segments */ -}; - -struct arcnet_local { - uint8_t config, /* current value of CONFIG register */ - timeout, /* Extended timeout for COM20020 */ - backplane, /* Backplane flag for COM20020 */ - clockp, /* COM20020 clock divider */ - clockm, /* COM20020 clock multiplier flag */ - setup, /* Contents of setup1 register */ - setup2, /* Contents of setup2 register */ - intmask; /* current value of INTMASK register */ - uint8_t default_proto[256]; /* default encap to use for each host */ - int cur_tx, /* buffer used by current transmit, or -1 */ - next_tx, /* buffer where a packet is ready to send */ - cur_rx; /* current receive buffer */ - int lastload_dest, /* can last loaded packet be acked? */ - lasttrans_dest; /* can last TX'd packet be acked? */ - int timed_out; /* need to process TX timeout and drop packet */ - unsigned long last_timeout; /* time of last reported timeout */ - char *card_name; /* card ident string */ - int card_flags; /* special card features */ - - /* On preemtive and SMB a lock is needed */ - spinlock_t lock; - - /* - * Buffer management: an ARCnet card has 4 x 512-byte buffers, each of - * which can be used for either sending or receiving. The new dynamic - * buffer management routines use a simple circular queue of available - * buffers, and take them as they're needed. This way, we simplify - * situations in which we (for example) want to pre-load a transmit - * buffer, or start receiving while we copy a received packet to - * memory. - * - * The rules: only the interrupt handler is allowed to _add_ buffers to - * the queue; thus, this doesn't require a lock. Both the interrupt - * handler and the transmit function will want to _remove_ buffers, so - * we need to handle the situation where they try to do it at the same - * time. - * - * If next_buf == first_free_buf, the queue is empty. Since there are - * only four possible buffers, the queue should never be full. - */ - atomic_t buf_lock; - int buf_queue[5]; - int next_buf, first_free_buf; - - /* network "reconfiguration" handling */ - unsigned long first_recon; /* time of "first" RECON message to count */ - unsigned long last_recon; /* time of most recent RECON */ - int num_recons; /* number of RECONs between first and last. */ - int network_down; /* do we think the network is down? */ - - int excnak_pending; /* We just got an excesive nak interrupt */ - - struct { - uint16_t sequence; /* sequence number (incs with each packet) */ - __be16 aborted_seq; - - struct Incoming incoming[256]; /* one from each address */ - } rfc1201; - - /* really only used by rfc1201, but we'll pretend it's not */ - struct Outgoing outgoing; /* packet currently being sent */ - - /* hardware-specific functions */ - struct { - struct module *owner; - void (*command)(struct net_device *dev, int cmd); - int (*status)(struct net_device *dev); - void (*intmask)(struct net_device *dev, int mask); - int (*reset)(struct net_device *dev, int really_reset); - void (*open)(struct net_device *dev); - void (*close)(struct net_device *dev); - - void (*copy_to_card)(struct net_device *dev, int bufnum, - int offset, void *buf, int count); - void (*copy_from_card)(struct net_device *dev, int bufnum, - int offset, void *buf, int count); - } hw; - - void __iomem *mem_start; /* pointer to ioremap'ed MMIO */ -}; - -#define ARCRESET(x) (lp->hw.reset(dev, (x))) -#define ACOMMAND(x) (lp->hw.command(dev, (x))) -#define ASTATUS() (lp->hw.status(dev)) -#define AINTMASK(x) (lp->hw.intmask(dev, (x))) - -#if ARCNET_DEBUG_MAX & D_SKB -void arcnet_dump_skb(struct net_device *dev, struct sk_buff *skb, char *desc); -#else -static inline -void arcnet_dump_skb(struct net_device *dev, struct sk_buff *skb, char *desc) -{ -} -#endif - -void arcnet_unregister_proto(struct ArcProto *proto); -irqreturn_t arcnet_interrupt(int irq, void *dev_id); -struct net_device *alloc_arcdev(const char *name); - -int arcnet_open(struct net_device *dev); -int arcnet_close(struct net_device *dev); -netdev_tx_t arcnet_send_packet(struct sk_buff *skb, - struct net_device *dev); -void arcnet_timeout(struct net_device *dev); - -#endif /* __KERNEL__ */ -#endif /* _LINUX_ARCDEVICE_H */ diff --git a/include/linux/com20020.h b/include/linux/com20020.h deleted file mode 100644 index 85898995b234..000000000000 --- a/include/linux/com20020.h +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Linux ARCnet driver - COM20020 chipset support - function declarations - * - * Written 1997 by David Woodhouse. - * Written 1994-1999 by Avery Pennarun. - * Derived from skeleton.c by Donald Becker. - * - * Special thanks to Contemporary Controls, Inc. (www.ccontrols.com) - * for sponsoring the further development of this driver. - * - * ********************** - * - * The original copyright of skeleton.c was as follows: - * - * skeleton.c Written 1993 by Donald Becker. - * Copyright 1993 United States Government as represented by the - * Director, National Security Agency. This software may only be used - * and distributed according to the terms of the GNU General Public License as - * modified by SRC, incorporated herein by reference. - * - * ********************** - * - * For more details, see drivers/net/arcnet.c - * - * ********************** - */ -#ifndef __COM20020_H -#define __COM20020_H - -int com20020_check(struct net_device *dev); -int com20020_found(struct net_device *dev, int shared); -extern const struct net_device_ops com20020_netdev_ops; - -/* The number of low I/O ports used by the card. */ -#define ARCNET_TOTAL_SIZE 8 - -/* various register addresses */ -#ifdef CONFIG_SA1100_CT6001 -#define BUS_ALIGN 2 /* 8 bit device on a 16 bit bus - needs padding */ -#else -#define BUS_ALIGN 1 -#endif - -#define PLX_PCI_MAX_CARDS 2 - -struct com20020_pci_channel_map { - u32 bar; - u32 offset; - u32 size; /* 0x00 - auto, e.g. length of entire bar */ -}; - -struct com20020_pci_card_info { - const char *name; - int devcount; - - struct com20020_pci_channel_map chan_map_tbl[PLX_PCI_MAX_CARDS]; - - unsigned int flags; -}; - -struct com20020_priv { - struct com20020_pci_card_info *ci; - struct list_head list_dev; -}; - -struct com20020_dev { - struct list_head list; - struct net_device *dev; - - struct com20020_priv *pci_priv; - int index; -}; - -#define _INTMASK (ioaddr+BUS_ALIGN*0) /* writable */ -#define _STATUS (ioaddr+BUS_ALIGN*0) /* readable */ -#define _COMMAND (ioaddr+BUS_ALIGN*1) /* standard arcnet commands */ -#define _DIAGSTAT (ioaddr+BUS_ALIGN*1) /* diagnostic status register */ -#define _ADDR_HI (ioaddr+BUS_ALIGN*2) /* control registers for IO-mapped memory */ -#define _ADDR_LO (ioaddr+BUS_ALIGN*3) -#define _MEMDATA (ioaddr+BUS_ALIGN*4) /* data port for IO-mapped memory */ -#define _SUBADR (ioaddr+BUS_ALIGN*5) /* the extended port _XREG refers to */ -#define _CONFIG (ioaddr+BUS_ALIGN*6) /* configuration register */ -#define _XREG (ioaddr+BUS_ALIGN*7) /* extra registers (indexed by _CONFIG - or _SUBADR) */ - -/* in the ADDR_HI register */ -#define RDDATAflag 0x80 /* next access is a read (not a write) */ - -/* in the DIAGSTAT register */ -#define NEWNXTIDflag 0x02 /* ID to which token is passed has changed */ - -/* in the CONFIG register */ -#define RESETcfg 0x80 /* put card in reset state */ -#define TXENcfg 0x20 /* enable TX */ - -/* in SETUP register */ -#define PROMISCset 0x10 /* enable RCV_ALL */ -#define P1MODE 0x80 /* enable P1-MODE for Backplane */ -#define SLOWARB 0x01 /* enable Slow Arbitration for >=5Mbps */ - -/* COM2002x */ -#define SUB_TENTATIVE 0 /* tentative node ID */ -#define SUB_NODE 1 /* node ID */ -#define SUB_SETUP1 2 /* various options */ -#define SUB_TEST 3 /* test/diag register */ - -/* COM20022 only */ -#define SUB_SETUP2 4 /* sundry options */ -#define SUB_BUSCTL 5 /* bus control options */ -#define SUB_DMACOUNT 6 /* DMA count options */ - -#define SET_SUBADR(x) do { \ - if ((x) < 4) \ - { \ - lp->config = (lp->config & ~0x03) | (x); \ - SETCONF; \ - } \ - else \ - { \ - outb(x, _SUBADR); \ - } \ -} while (0) - -#undef ARCRESET -#undef ASTATUS -#undef ACOMMAND -#undef AINTMASK - -#define ARCRESET { outb(lp->config | 0x80, _CONFIG); \ - udelay(5); \ - outb(lp->config , _CONFIG); \ - } -#define ARCRESET0 { outb(0x18 | 0x80, _CONFIG); \ - udelay(5); \ - outb(0x18 , _CONFIG); \ - } - -#define ASTATUS() inb(_STATUS) -#define ADIAGSTATUS() inb(_DIAGSTAT) -#define ACOMMAND(cmd) outb((cmd),_COMMAND) -#define AINTMASK(msk) outb((msk),_INTMASK) - -#define SETCONF outb(lp->config, _CONFIG) - -#endif /* __COM20020_H */ -- cgit v1.2.3 From 2d8bff12699abc3a9bf886bb0b79f44d94d81496 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Wed, 23 Sep 2015 14:57:58 -0400 Subject: netpoll: Close race condition between poll_one_napi and napi_disable Drivers might call napi_disable while not holding the napi instance poll_lock. In those instances, its possible for a race condition to exist between poll_one_napi and napi_disable. That is to say, poll_one_napi only tests the NAPI_STATE_SCHED bit to see if there is work to do during a poll, and as such the following may happen: CPU0 CPU1 ndo_tx_timeout napi_poll_dev napi_disable poll_one_napi test_and_set_bit (ret 0) test_bit (ret 1) reset adapter napi_poll_routine If the adapter gets a tx timeout without a napi instance scheduled, its possible for the adapter to think it has exclusive access to the hardware (as the napi instance is now scheduled via the napi_disable call), while the netpoll code thinks there is simply work to do. The result is parallel hardware access leading to corrupt data structures in the driver, and a crash. Additionaly, there is another, more critical race between netpoll and napi_disable. The disabled napi state is actually identical to the scheduled state for a given napi instance. The implication being that, if a napi instance is disabled, a netconsole instance would see the napi state of the device as having been scheduled, and poll it, likely while the driver was dong something requiring exclusive access. In the case above, its fairly clear that not having the rings in a state ready to be polled will cause any number of crashes. The fix should be pretty easy. netpoll uses its own bit to indicate that that the napi instance is in a state of being serviced by netpoll (NAPI_STATE_NPSVC). We can just gate disabling on that bit as well as the sched bit. That should prevent netpoll from conducting a napi poll if we convert its set bit to a test_and_set_bit operation to provide mutual exclusion Change notes: V2) Remove a trailing whtiespace Resubmit with proper subject prefix V3) Clean up spacing nits Signed-off-by: Neil Horman CC: "David S. Miller" CC: jmaxwell@redhat.com Tested-by: jmaxwell@redhat.com Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 88a00694eda5..2d15e3831440 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -507,6 +507,7 @@ static inline void napi_enable(struct napi_struct *n) BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); smp_mb__before_atomic(); clear_bit(NAPI_STATE_SCHED, &n->state); + clear_bit(NAPI_STATE_NPSVC, &n->state); } #ifdef CONFIG_SMP -- cgit v1.2.3 From a79e88d9fbbe2e3ecb9d883fb59dca7468d42d79 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Wed, 23 Sep 2015 08:39:16 -0700 Subject: bridge: define some min/max/default ageing time constants Signed-off-by: Scott Feldman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index dad8b00beed2..a338a688ee4a 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -46,6 +46,12 @@ struct br_ip_list { #define BR_LEARNING_SYNC BIT(9) #define BR_PROXYARP_WIFI BIT(10) +/* values as per ieee8021QBridgeFdbAgingTime */ +#define BR_MIN_AGEING_TIME (10 * HZ) +#define BR_MAX_AGEING_TIME (1000000 * HZ) + +#define BR_DEFAULT_AGEING_TIME (300 * HZ) + extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *)); typedef int br_should_route_hook_t(struct sk_buff *skb); -- cgit v1.2.3 From f856f21dbcd162a53e30987a91d75d5ab54a7f80 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Thu, 24 Sep 2015 09:37:10 +0200 Subject: ieee802154: remove unnecessary includes This patch removes some unnecessary includes from ieee802154 header, which was introduced by commit b609fb54adfa ("ieee802154: add helpers for frame control checks"). Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index aca228b81464..d3e415674dac 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -25,9 +25,6 @@ #include #include -#include -#include -#include #define IEEE802154_MTU 127 #define IEEE802154_ACK_PSDU_LEN 5 -- cgit v1.2.3 From 9badce000e2ce68ba74838a3cd356dde58221c2f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Sep 2015 17:07:29 -0400 Subject: cgroup, writeback: don't enable cgroup writeback on traditional hierarchies inode_cgwb_enabled() gates cgroup writeback support. If it returns true, each inode is attached to the corresponding memory domain which gets mapped to io domain. It currently only tests whether the filesystem and bdi support cgroup writeback; however, cgroup writeback support doesn't work on traditional hierarchies and thus it should also test whether memcg and iocg are on the default hierarchy. This caused traditional hierarchy setups to hit the cgroup writeback path inadvertently and ended up creating separate writeback domains for each memcg and mapping them all to the root iocg uncovering a couple issues in the cgroup writeback path. cgroup writeback was never meant to be enabled on traditional hierarchies. Make inode_cgwb_enabled() test whether both memcg and iocg are on the default hierarchy. Signed-off-by: Tejun Heo Reported-by: Artem Bityutskiy Reported-by: Dexuan Cui Link: http://lkml.kernel.org/g/1443012552.19983.209.camel@gmail.com Link: http://lkml.kernel.org/g/f30d4a6aa8a546ff88f73021d026a453@SIXPR30MB031.064d.mgd.msft.net --- include/linux/backing-dev.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 5a5d79ee256f..d5eb4ad1c534 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -252,13 +253,19 @@ int inode_congested(struct inode *inode, int cong_bits); * @inode: inode of interest * * cgroup writeback requires support from both the bdi and filesystem. - * Test whether @inode has both. + * Also, both memcg and iocg have to be on the default hierarchy. Test + * whether all conditions are met. + * + * Note that the test result may change dynamically on the same inode + * depending on how memcg and iocg are configured. */ static inline bool inode_cgwb_enabled(struct inode *inode) { struct backing_dev_info *bdi = inode_to_bdi(inode); - return bdi_cap_account_dirty(bdi) && + return cgroup_on_dfl(mem_cgroup_root_css->cgroup) && + cgroup_on_dfl(blkcg_root_css->cgroup) && + bdi_cap_account_dirty(bdi) && (bdi->capabilities & BDI_CAP_CGROUP_WRITEBACK) && (inode->i_sb->s_iflags & SB_I_CGROUPWB); } -- cgit v1.2.3 From 6ae459bdaaeebc632b16e54dcbabb490c6931d61 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 22 Sep 2015 12:57:53 -0700 Subject: skbuff: Fix skb checksum flag on skb pull VXLAN device can receive skb with checksum partial. But the checksum offset could be in outer header which is pulled on receive. This results in negative checksum offset for the skb. Such skb can cause the assert failure in skb_checksum_help(). Following patch fixes the bug by setting checksum-none while pulling outer header. Following is the kernel panic msg from old kernel hitting the bug. ------------[ cut here ]------------ kernel BUG at net/core/dev.c:1906! RIP: 0010:[] skb_checksum_help+0x144/0x150 Call Trace: [] queue_userspace_packet+0x408/0x470 [openvswitch] [] ovs_dp_upcall+0x5d/0x60 [openvswitch] [] ovs_dp_process_packet_with_key+0xe6/0x100 [openvswitch] [] ovs_dp_process_received_packet+0x4b/0x80 [openvswitch] [] ovs_vport_receive+0x2a/0x30 [openvswitch] [] vxlan_rcv+0x53/0x60 [openvswitch] [] vxlan_udp_encap_recv+0x8b/0xf0 [openvswitch] [] udp_queue_rcv_skb+0x2dc/0x3b0 [] __udp4_lib_rcv+0x1cf/0x6c0 [] udp_rcv+0x1a/0x20 [] ip_local_deliver_finish+0xdd/0x280 [] ip_local_deliver+0x88/0x90 [] ip_rcv_finish+0x10d/0x370 [] ip_rcv+0x235/0x300 [] __netif_receive_skb+0x55d/0x620 [] netif_receive_skb+0x80/0x90 [] virtnet_poll+0x555/0x6f0 [] net_rx_action+0x134/0x290 [] __do_softirq+0xa8/0x210 [] call_softirq+0x1c/0x30 [] do_softirq+0x65/0xa0 [] irq_exit+0x8e/0xb0 [] do_IRQ+0x63/0xe0 [] common_interrupt+0x6e/0x6e Reported-by: Anupam Chanda Signed-off-by: Pravin B Shelar Acked-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9987af080fa0..2b0a30a6e31c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2707,6 +2707,9 @@ static inline void skb_postpull_rcsum(struct sk_buff *skb, { if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0)); + else if (skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_start_offset(skb) <= len) + skb->ip_summed = CHECKSUM_NONE; } unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len); -- cgit v1.2.3 From 3e3aaf649416988ca8be4ad2c52dc24d8be7b46e Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 24 Sep 2015 20:36:02 +0100 Subject: phy: fix mdiobus module safety Re-implement the mdiobus module refcounting to ensure that we actually ensure that the mdiobus module code does not go away while we might call into it. The old scheme using bus->dev.driver was buggy, because bus->dev is a class device which never has a struct device_driver associated with it, and hence the associated code trying to obtain a refcount did nothing useful. Instead, take the approach that other subsystems do: pass the module when calling mdiobus_register(), and record that in the mii_bus struct. When we need to increment the module use count in the phy code, use this stored pointer. When the phy is deteched, drop the module refcount, remembering that the phy device might go away at that point. This doesn't stop the mii_bus going away while there are in-use phys - it merely stops the underlying code vanishing. Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phy.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 962387a192f1..11bce44f6d65 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -153,6 +154,7 @@ struct sk_buff; * PHYs should register using this structure */ struct mii_bus { + struct module *owner; const char *name; char id[MII_BUS_ID_SIZE]; void *priv; @@ -198,7 +200,8 @@ static inline struct mii_bus *mdiobus_alloc(void) return mdiobus_alloc_size(0); } -int mdiobus_register(struct mii_bus *bus); +int __mdiobus_register(struct mii_bus *bus, struct module *owner); +#define mdiobus_register(bus) __mdiobus_register(bus, THIS_MODULE) void mdiobus_unregister(struct mii_bus *bus); void mdiobus_free(struct mii_bus *bus); struct mii_bus *devm_mdiobus_alloc_size(struct device *dev, int sizeof_priv); -- cgit v1.2.3 From 38737e490d4ea91660d3cec83ef88c4e6d360ae4 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 24 Sep 2015 20:36:28 +0100 Subject: phy: add phy_device_remove() Add a phy_device_remove() function to complement phy_device_register(), which undoes the effects of phy_device_register() by removing the phy device from visibility, but not freeing it. This allows these details to be moved out of the mdio bus code into the phy code where this action belongs. Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 11bce44f6d65..4a4e3a092337 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -745,6 +745,7 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, struct phy_c45_device_ids *c45_ids); struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45); int phy_device_register(struct phy_device *phy); +void phy_device_remove(struct phy_device *phydev); int phy_init_hw(struct phy_device *phydev); int phy_suspend(struct phy_device *phydev); int phy_resume(struct phy_device *phydev); -- cgit v1.2.3 From c6790aa9f4fdc26b1246ba36da2fd749663beb65 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 24 Sep 2015 10:34:23 +0300 Subject: IB/mlx5: Remove support for IB_DEVICE_LOCAL_DMA_LKEY Commit 96249d70dd70 ("IB/core: Guarantee that a local_dma_lkey is available") allows ULPs that make use of the local dma key to keep working as before by allocating a DMA MR with local permissions and converted these consumers to use the MR associated with the PD rather then device->local_dma_lkey. ConnectIB has some known issues with memory registration using the local_dma_lkey (SEND, RDMA, RECV seems to work ok). Thus don't expose support for it (remove device->local_dma_lkey setting), and take advantage of the above commit such that no regression is introduced to working systems. The local_dma_lkey support will be restored in CX4 depending on FW capability query. Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford --- include/linux/mlx5/device.h | 11 ----------- include/linux/mlx5/driver.h | 1 - 2 files changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 8eb3b19af2a4..250b1ff8b48d 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -402,17 +402,6 @@ struct mlx5_cmd_teardown_hca_mbox_out { u8 rsvd[8]; }; -struct mlx5_cmd_query_special_contexts_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_cmd_query_special_contexts_mbox_out { - struct mlx5_outbox_hdr hdr; - __be32 dump_fill_mkey; - __be32 resd_lkey; -}; - struct mlx5_cmd_layout { u8 type; u8 rsvd0[3]; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 27b53f9a24ad..8b6d6f2154a4 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -845,7 +845,6 @@ void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol); int mlx5_register_interface(struct mlx5_interface *intf); void mlx5_unregister_interface(struct mlx5_interface *intf); int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id); -int mlx5_core_query_special_context(struct mlx5_core_dev *dev, u32 *rsvd_lkey); struct mlx5_profile { u64 mask; -- cgit v1.2.3 From 5ebc76035303016ec41bb752bec156ea9fde7c34 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 17 Sep 2015 14:02:45 +0800 Subject: ACPI, PCI, irq: Do not share PCI IRQ with ISA IRQ Avoid IRQs occupied by ISA IRQs when allocating IRQs for PCI link devices, otherwise it may cause interrupt storm due to incompatible pin attributes. This issue was triggered on a KVM virtual machine, which 1) uses IRQ9 for SCI in high level mode. 2) defines an PCI interrupt link device (LNKS) with IRQ9 as the only possible irq. 3) has an PCI device referring to link device LNKS. So it causes interrupt storm when enabling the PCI device because PCI IRQ works in low level mode. Signed-off-by: Jiang Liu Acked-by: Bjorn Helgaas Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 7235c4851460..43856d19cf4d 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -217,6 +217,7 @@ struct pci_dev; int acpi_pci_irq_enable (struct pci_dev *dev); void acpi_penalize_isa_irq(int irq, int active); +bool acpi_isa_irq_available(int irq); void acpi_penalize_sci_irq(int irq, int trigger, int polarity); void acpi_pci_irq_disable (struct pci_dev *dev); -- cgit v1.2.3 From 78ccb25861d76a8fc5c678d762180e6918834200 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Fri, 25 Sep 2015 10:49:15 +0300 Subject: net/mlx5_core: Fix wrong name in struct The name refers to syndrome so uset ext_synd instread of ext_sync. Signed-off-by: Eli Cohen Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 8eb3b19af2a4..41e9f3bd663c 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -440,7 +440,7 @@ struct health_buffer { __be32 rsvd2; u8 irisc_index; u8 synd; - __be16 ext_sync; + __be16 ext_synd; }; struct mlx5_init_seg { -- cgit v1.2.3 From 55acca90da52b85299c033354e51ddaa7b73e019 Mon Sep 17 00:00:00 2001 From: Hante Meuleman Date: Fri, 18 Sep 2015 22:08:17 +0200 Subject: brcmfmac: Add support for the BCM4365 and BCM4366 PCIE devices. This patch adds support for the BCM4365 and BCM4366 11ac Wave2 PCIE devices. Reviewed-by: Arend Van Spriel Reviewed-by: Pieter-Paul Giesberts Signed-off-by: Hante Meuleman Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- include/linux/bcma/bcma.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 2ff4a9961e1d..3feb1b2d75d8 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -151,6 +151,8 @@ struct bcma_host_ops { #define BCMA_CORE_PCIE2 0x83C /* PCI Express Gen2 */ #define BCMA_CORE_USB30_DEV 0x83D #define BCMA_CORE_ARM_CR4 0x83E +#define BCMA_CORE_ARM_CA7 0x847 +#define BCMA_CORE_SYS_MEM 0x849 #define BCMA_CORE_DEFAULT 0xFFF #define BCMA_MAX_NR_CORES 16 -- cgit v1.2.3 From d815d90bbbc08777c0e3a36f57b97fc4a4fb3150 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 25 Sep 2015 15:07:28 -0500 Subject: netfilter: Push struct net down into nf_afinfo.reroute The network namespace is needed when routing a packet. Stop making nf_afinfo.reroute guess which network namespace is the proper namespace to route the packet in. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 987c74cd523c..165ab2d14734 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -283,7 +283,7 @@ struct nf_afinfo { struct flowi *fl, bool strict); void (*saveroute)(const struct sk_buff *skb, struct nf_queue_entry *entry); - int (*reroute)(struct sk_buff *skb, + int (*reroute)(struct net *net, struct sk_buff *skb, const struct nf_queue_entry *entry); int route_key_size; }; -- cgit v1.2.3 From e45f50660ee5fd38a540afabb7c0f65d063db631 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 25 Sep 2015 15:07:30 -0500 Subject: ipv4: Pass struct net into ip_route_me_harder Don't make ip_route_me_harder guess which network namespace it is routing in, pass the network namespace in. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index 6e4591bb54d4..98c03b2462b5 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -6,7 +6,7 @@ #include -int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type); +int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned addr_type); __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); #endif /*__LINUX_IP_NETFILTER_H*/ -- cgit v1.2.3 From 5f5d74d723146c5b97c7318b5851af15b30e3304 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 25 Sep 2015 15:07:31 -0500 Subject: ipv6: Pass struct net into ip6_route_me_harder Don't make ip6_route_me_harder guess which network namespace it is routing in, pass the network namespace in. Signed-off-by: Eric W. Biederman Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 771574677e83..2ac8369fa96c 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -22,7 +22,7 @@ struct nf_ipv6_ops { }; #ifdef CONFIG_NETFILTER -int ip6_route_me_harder(struct sk_buff *skb); +int ip6_route_me_harder(struct net *net, struct sk_buff *skb); __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); -- cgit v1.2.3 From 2094acbb714e24e464c810c2d8fa57493fcb25a6 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 28 Sep 2015 11:10:31 -0700 Subject: net/ipv4: Pass proto as u8 instead of u16 in ip_check_mc_rcu This patch updates ip_check_mc_rcu so that protocol is passed as a u8 instead of a u16. The motivation is just to avoid any unneeded type transitions since some systems will require an instruction to zero extend a u8 field to a u16. Also it makes it a bit more readable as to the fact that protocol is a u8 so there are no byte ordering changes needed to pass it. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/igmp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 908429216d9f..9c9de11549a7 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -110,7 +110,7 @@ struct ip_mc_list { #define IGMPV3_QQIC(value) IGMPV3_EXP(0x80, 4, 3, value) #define IGMPV3_MRC(value) IGMPV3_EXP(0x80, 4, 3, value) -extern int ip_check_mc_rcu(struct in_device *dev, __be32 mc_addr, __be32 src_addr, u16 proto); +extern int ip_check_mc_rcu(struct in_device *dev, __be32 mc_addr, __be32 src_addr, u8 proto); extern int igmp_rcv(struct sk_buff *); extern int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr); extern int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr); -- cgit v1.2.3 From 31b33dfb0a144469dd805514c9e63f4993729a48 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 28 Sep 2015 17:24:25 -0700 Subject: skbuff: Fix skb checksum partial check. Earlier patch 6ae459bda tried to detect void ckecksum partial skb by comparing pull length to checksum offset. But it does not work for all cases since checksum-offset depends on updates to skb->data. Following patch fixes it by validating checksum start offset after skb-data pointer is updated. Negative value of checksum offset start means there is no need to checksum. Fixes: 6ae459bda ("skbuff: Fix skb checksum flag on skb pull") Reported-by: Andrew Vagin Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2b0a30a6e31c..4398411236f1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2708,7 +2708,7 @@ static inline void skb_postpull_rcsum(struct sk_buff *skb, if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0)); else if (skb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_start_offset(skb) <= len) + skb_checksum_start_offset(skb) < 0) skb->ip_summed = CHECKSUM_NONE; } -- cgit v1.2.3 From 0536fcc039a8926ec12ec587f41a83f7acafeb82 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 29 Sep 2015 07:42:52 -0700 Subject: tcp: prepare fastopen code for upcoming listener changes While auditing TCP stack for upcoming 'lockless' listener changes, I found I had to change fastopen_init_queue() to properly init the object before publishing it. Otherwise an other cpu could try to lock the spinlock before it gets properly initialized. Instead of adding appropriate barriers, just remove dynamic memory allocations : - Structure is 28 bytes on 64bit arches. Using additional 8 bytes for holding a pointer seems overkill. - Two listeners can share same cache line and performance would suffer. If we really want to save few bytes, we would instead dynamically allocate whole struct request_sock_queue in the future. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index fcb573be75d9..e442e6e9a365 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -382,25 +382,11 @@ static inline bool tcp_passive_fastopen(const struct sock *sk) tcp_sk(sk)->fastopen_rsk != NULL); } -extern void tcp_sock_destruct(struct sock *sk); - -static inline int fastopen_init_queue(struct sock *sk, int backlog) +static inline void fastopen_queue_tune(struct sock *sk, int backlog) { - struct request_sock_queue *queue = - &inet_csk(sk)->icsk_accept_queue; - - if (queue->fastopenq == NULL) { - queue->fastopenq = kzalloc( - sizeof(struct fastopen_queue), - sk->sk_allocation); - if (queue->fastopenq == NULL) - return -ENOMEM; - - sk->sk_destruct = tcp_sock_destruct; - spin_lock_init(&queue->fastopenq->lock); - } - queue->fastopenq->max_qlen = backlog; - return 0; + struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; + + queue->fastopenq.max_qlen = backlog; } static inline void tcp_saved_syn_free(struct tcp_sock *tp) -- cgit v1.2.3 From 007979eaf94d1c888d8c7cf8a5250c2c6c9bd98e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 29 Sep 2015 20:07:10 -0700 Subject: net: Rename IFF_VRF_MASTER to IFF_L3MDEV_MASTER Rename IFF_VRF_MASTER to IFF_L3MDEV_MASTER and update the name of the netif_is_vrf and netif_index_is_vrf macros. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d2ffeafc9998..99c33e83822f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1258,7 +1258,7 @@ struct net_device_ops { * @IFF_LIVE_ADDR_CHANGE: device supports hardware address * change when it's running * @IFF_MACVLAN: Macvlan device - * @IFF_VRF_MASTER: device is a VRF master + * @IFF_L3MDEV_MASTER: device is an L3 master device * @IFF_NO_QUEUE: device can run without qdisc attached * @IFF_OPENVSWITCH: device is a Open vSwitch master */ @@ -1283,7 +1283,7 @@ enum netdev_priv_flags { IFF_XMIT_DST_RELEASE_PERM = 1<<17, IFF_IPVLAN_MASTER = 1<<18, IFF_IPVLAN_SLAVE = 1<<19, - IFF_VRF_MASTER = 1<<20, + IFF_L3MDEV_MASTER = 1<<20, IFF_NO_QUEUE = 1<<21, IFF_OPENVSWITCH = 1<<22, }; @@ -1308,7 +1308,7 @@ enum netdev_priv_flags { #define IFF_XMIT_DST_RELEASE_PERM IFF_XMIT_DST_RELEASE_PERM #define IFF_IPVLAN_MASTER IFF_IPVLAN_MASTER #define IFF_IPVLAN_SLAVE IFF_IPVLAN_SLAVE -#define IFF_VRF_MASTER IFF_VRF_MASTER +#define IFF_L3MDEV_MASTER IFF_L3MDEV_MASTER #define IFF_NO_QUEUE IFF_NO_QUEUE #define IFF_OPENVSWITCH IFF_OPENVSWITCH @@ -3824,9 +3824,9 @@ static inline bool netif_supports_nofcs(struct net_device *dev) return dev->priv_flags & IFF_SUPP_NOFCS; } -static inline bool netif_is_vrf(const struct net_device *dev) +static inline bool netif_is_l3_master(const struct net_device *dev) { - return dev->priv_flags & IFF_VRF_MASTER; + return dev->priv_flags & IFF_L3MDEV_MASTER; } static inline bool netif_is_bridge_master(const struct net_device *dev) @@ -3839,7 +3839,7 @@ static inline bool netif_is_ovs_master(const struct net_device *dev) return dev->priv_flags & IFF_OPENVSWITCH; } -static inline bool netif_index_is_vrf(struct net *net, int ifindex) +static inline bool netif_index_is_l3_master(struct net *net, int ifindex) { bool rc = false; @@ -3853,7 +3853,7 @@ static inline bool netif_index_is_vrf(struct net *net, int ifindex) dev = dev_get_by_index_rcu(net, ifindex); if (dev) - rc = netif_is_vrf(dev); + rc = netif_is_l3_master(dev); rcu_read_unlock(); #endif -- cgit v1.2.3 From 1b69c6d0ae90b7f1a4f61d5c8209d5cb7a55f849 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 29 Sep 2015 20:07:11 -0700 Subject: net: Introduce L3 Master device abstraction L3 master devices allow users of the abstraction to influence FIB lookups for enslaved devices. Current API provides a means for the master device to return a specific FIB table for an enslaved device, to return an rtable/custom dst and influence the OIF used for fib lookups. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 99c33e83822f..c7f14794fe14 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1587,6 +1587,9 @@ struct net_device { #ifdef CONFIG_NET_SWITCHDEV const struct switchdev_ops *switchdev_ops; #endif +#ifdef CONFIG_NET_L3_MASTER_DEV + const struct l3mdev_ops *l3mdev_ops; +#endif const struct header_ops *header_ops; -- cgit v1.2.3 From 93a7e7e837af6846052481da974320c19ab82e5c Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 29 Sep 2015 20:07:16 -0700 Subject: net: Remove the now unused vrf_ptr Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c7f14794fe14..72bf9e37a2f0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1427,7 +1427,6 @@ enum netdev_priv_flags { * @dn_ptr: DECnet specific data * @ip6_ptr: IPv6 specific data * @ax25_ptr: AX.25 specific data - * @vrf_ptr: VRF specific data * @ieee80211_ptr: IEEE 802.11 specific data, assign before registering * * @last_rx: Time of last Rx @@ -1649,7 +1648,6 @@ struct net_device { struct dn_dev __rcu *dn_ptr; struct inet6_dev __rcu *ip6_ptr; void *ax25_ptr; - struct net_vrf_dev __rcu *vrf_ptr; struct wireless_dev *ieee80211_ptr; struct wpan_dev *ieee802154_ptr; #if IS_ENABLED(CONFIG_MPLS_ROUTING) -- cgit v1.2.3 From 9478d12d33ad12d29c5343ae7346b51bc1f4c5a9 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 29 Sep 2015 20:07:18 -0700 Subject: net: Move netif_index_is_l3_master to l3mdev.h Change CONFIG dependency to CONFIG_NET_L3_MASTER_DEV as well. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 72bf9e37a2f0..b9450784ae06 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3840,27 +3840,6 @@ static inline bool netif_is_ovs_master(const struct net_device *dev) return dev->priv_flags & IFF_OPENVSWITCH; } -static inline bool netif_index_is_l3_master(struct net *net, int ifindex) -{ - bool rc = false; - -#if IS_ENABLED(CONFIG_NET_VRF) - struct net_device *dev; - - if (ifindex == 0) - return false; - - rcu_read_lock(); - - dev = dev_get_by_index_rcu(net, ifindex); - if (dev) - rc = netif_is_l3_master(dev); - - rcu_read_unlock(); -#endif - return rc; -} - /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */ static inline void netif_keep_dst(struct net_device *dev) { -- cgit v1.2.3 From 7d8c6e391575ee86c870b88635a163743fca9eac Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 12 Jun 2015 22:12:04 -0500 Subject: ipv6: Pass struct net through ip6_fragment Signed-off-by: Eric W. Biederman --- include/linux/netfilter_ipv6.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 2ac8369fa96c..47c6b04c28c0 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -17,8 +17,8 @@ struct nf_ipv6_ops { int (*chk_addr)(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict); void (*route_input)(struct sk_buff *skb); - int (*fragment)(struct sock *sk, struct sk_buff *skb, - int (*output)(struct sock *, struct sk_buff *)); + int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, + int (*output)(struct net *, struct sock *, struct sk_buff *)); }; #ifdef CONFIG_NETFILTER -- cgit v1.2.3 From 0610c25daa3e76e38ad5a8fae683a89ff9f71798 Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Thu, 1 Oct 2015 15:37:02 -0700 Subject: memcg: fix dirty page migration The problem starts with a file backed dirty page which is charged to a memcg. Then page migration is used to move oldpage to newpage. Migration: - copies the oldpage's data to newpage - clears oldpage.PG_dirty - sets newpage.PG_dirty - uncharges oldpage from memcg - charges newpage to memcg Clearing oldpage.PG_dirty decrements the charged memcg's dirty page count. However, because newpage is not yet charged, setting newpage.PG_dirty does not increment the memcg's dirty page count. After migration completes newpage.PG_dirty is eventually cleared, often in account_page_cleaned(). At this time newpage is charged to a memcg so the memcg's dirty page count is decremented which causes underflow because the count was not previously incremented by migration. This underflow causes balance_dirty_pages() to see a very large unsigned number of dirty memcg pages which leads to aggressive throttling of buffered writes by processes in non root memcg. This issue: - can harm performance of non root memcg buffered writes. - can report too small (even negative) values in memory.stat[(total_)dirty] counters of all memcg, including the root. To avoid polluting migrate.c with #ifdef CONFIG_MEMCG checks, introduce page_memcg() and set_page_memcg() helpers. Test: 0) setup and enter limited memcg mkdir /sys/fs/cgroup/test echo 1G > /sys/fs/cgroup/test/memory.limit_in_bytes echo $$ > /sys/fs/cgroup/test/cgroup.procs 1) buffered writes baseline dd if=/dev/zero of=/data/tmp/foo bs=1M count=1k sync grep ^dirty /sys/fs/cgroup/test/memory.stat 2) buffered writes with compaction antagonist to induce migration yes 1 > /proc/sys/vm/compact_memory & rm -rf /data/tmp/foo dd if=/dev/zero of=/data/tmp/foo bs=1M count=1k kill % sync grep ^dirty /sys/fs/cgroup/test/memory.stat 3) buffered writes without antagonist, should match baseline rm -rf /data/tmp/foo dd if=/dev/zero of=/data/tmp/foo bs=1M count=1k sync grep ^dirty /sys/fs/cgroup/test/memory.stat (speed, dirty residue) unpatched patched 1) 841 MB/s 0 dirty pages 886 MB/s 0 dirty pages 2) 611 MB/s -33427456 dirty pages 793 MB/s 0 dirty pages 3) 114 MB/s -33427456 dirty pages 891 MB/s 0 dirty pages Notice that unpatched baseline performance (1) fell after migration (3): 841 -> 114 MB/s. In the patched kernel, post migration performance matches baseline. Fixes: c4843a7593a9 ("memcg: add per cgroup dirty page accounting") Signed-off-by: Greg Thelen Reported-by: Dave Hansen Acked-by: Michal Hocko Acked-by: Johannes Weiner Cc: [4.2+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 91c08f6f0dc9..80001de019ba 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -905,6 +905,27 @@ static inline void set_page_links(struct page *page, enum zone_type zone, #endif } +#ifdef CONFIG_MEMCG +static inline struct mem_cgroup *page_memcg(struct page *page) +{ + return page->mem_cgroup; +} + +static inline void set_page_memcg(struct page *page, struct mem_cgroup *memcg) +{ + page->mem_cgroup = memcg; +} +#else +static inline struct mem_cgroup *page_memcg(struct page *page) +{ + return NULL; +} + +static inline void set_page_memcg(struct page *page, struct mem_cgroup *memcg) +{ +} +#endif + /* * Some inline functions in vmstat.h depend on page_zone() */ -- cgit v1.2.3 From ef510194cefe0cd369ef73419cd65b0a5bb4fb5b Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Thu, 1 Oct 2015 15:37:13 -0700 Subject: memcg: remove pcp_counter_lock Commit 733a572e66d2 ("memcg: make mem_cgroup_read_{stat|event}() iterate possible cpus instead of online") removed the last use of the per memcg pcp_counter_lock but forgot to remove the variable. Kill the vestigial variable. Signed-off-by: Greg Thelen Acked-by: Michal Hocko Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ad800e62cb7a..6452ff4c463f 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -242,7 +242,6 @@ struct mem_cgroup { * percpu counter. */ struct mem_cgroup_stat_cpu __percpu *stat; - spinlock_t pcp_counter_lock; #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET) struct cg_proto tcp_mem; -- cgit v1.2.3 From a91263d520246b63c63e75ddfb072ee6a853fe15 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 30 Sep 2015 01:41:50 +0200 Subject: ebpf: migrate bpf_prog's flags to bitfield As we need to add further flags to the bpf_prog structure, lets migrate both bools to a bitfield representation. The size of the base structure (excluding insns) remains unchanged at 40 bytes. Add also tags for the kmemchecker, so that it doesn't throw false positives. Even in case gcc would generate suboptimal code, it's not being accessed in performance critical paths. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index fa2cab985e57..bad618f316d7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -326,8 +326,10 @@ struct bpf_binary_header { struct bpf_prog { u16 pages; /* Number of allocated pages */ - bool jited; /* Is our filter JIT'ed? */ - bool gpl_compatible; /* Is our filter GPL compatible? */ + kmemcheck_bitfield_begin(meta); + u16 jited:1, /* Is our filter JIT'ed? */ + gpl_compatible:1; /* Is filter GPL compatible? */ + kmemcheck_bitfield_end(meta); u32 len; /* Number of filter blocks */ enum bpf_prog_type type; /* Type of BPF program */ struct bpf_prog_aux *aux; /* Auxiliary fields */ -- cgit v1.2.3 From c46646d0484f5d08e2bede9b45034ba5b8b489cc Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 30 Sep 2015 01:41:51 +0200 Subject: sched, bpf: add helper for retrieving routing realms Using routing realms as part of the classifier is quite useful, it can be viewed as a tag for one or multiple routing entries (think of an analogy to net_cls cgroup for processes), set by user space routing daemons or via iproute2 as an indicator for traffic classifiers and later on processed in the eBPF program. Unlike actions, the classifier can inspect device flags and enable netif_keep_dst() if necessary. tc actions don't have that possibility, but in case people know what they are doing, it can be used from there as well (e.g. via devs that must keep dsts by design anyway). If a realm is set, the handler returns the non-zero realm. User space can set the full 32bit realm for the dst. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index bad618f316d7..3d5fd24b321b 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -328,7 +328,8 @@ struct bpf_prog { u16 pages; /* Number of allocated pages */ kmemcheck_bitfield_begin(meta); u16 jited:1, /* Is our filter JIT'ed? */ - gpl_compatible:1; /* Is filter GPL compatible? */ + gpl_compatible:1, /* Is filter GPL compatible? */ + dst_needed:1; /* Do we need dst entry? */ kmemcheck_bitfield_end(meta); u32 len; /* Number of filter blocks */ enum bpf_prog_type type; /* Type of BPF program */ -- cgit v1.2.3 From f3a6bd393c2c5d0e6b16624ba99a1c5fa07bdb0b Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 30 Sep 2015 15:15:52 +0900 Subject: phylib: Add phy_set_max_speed helper Add a helper to allow ethernet drivers to limit the speed of a phy (that they are attached to). This mainly involves factoring out the business-end of of_set_phy_supported() and exporting a new symbol. This code seems to be open coded in several places, in several different variants. It is is envisaged that this will be used in situations where setting the "max-speed" property in DT is not appropriate, e.g. because the maximum speed is not a property of the phy hardware. Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 4a4e3a092337..4c477e6ece33 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -798,6 +798,7 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd); int phy_start_interrupts(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); void phy_device_free(struct phy_device *phydev); +int phy_set_max_speed(struct phy_device *phydev, u32 max_speed); int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask, int (*run)(struct phy_device *)); -- cgit v1.2.3 From e7eadb4de9e645e1b34539dc4128240b1e5f71dc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 4 Oct 2015 21:08:09 -0700 Subject: ipv6: inet6_sk() should use sk_fullsock() SYN_RECV & TIMEWAIT sockets are not full blown, they do not have a pinet6 pointer. Fixes: ca6fb0651883 ("tcp: attach SYNACK messages to request sockets instead of listener") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/ipv6.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index f1f32af6d9b9..0ef2a97ccdb5 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -264,9 +264,9 @@ struct tcp6_timewait_sock { }; #if IS_ENABLED(CONFIG_IPV6) -static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk) +static inline struct ipv6_pinfo *inet6_sk(const struct sock *__sk) { - return inet_sk(__sk)->pinet6; + return sk_fullsock(__sk) ? inet_sk(__sk)->pinet6 : NULL; } static inline struct raw6_sock *raw6_sk(const struct sock *sk) -- cgit v1.2.3 From 7741c373cf3ea1f5383fa97fb7a640a429d3dd7c Mon Sep 17 00:00:00 2001 From: Jon Ringle Date: Thu, 1 Oct 2015 07:43:20 -0400 Subject: regmap: Allow installing custom reg_update_bits function This commit allows installing a custom reg_update_bits function for cases where the hardware provides a mechanism to set or clear register bits without a read/modify/write cycle. Such is the case with the Microchip ENCX24J600. Signed-off-by: Jon Ringle Signed-off-by: David S. Miller --- include/linux/regmap.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 8fc0bfd8edc4..4d3a3b1680bb 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -296,6 +296,9 @@ typedef int (*regmap_hw_reg_read)(void *context, unsigned int reg, unsigned int *val); typedef int (*regmap_hw_reg_write)(void *context, unsigned int reg, unsigned int val); +typedef int (*regmap_hw_reg_update_bits)(void *context, unsigned int reg, + unsigned int mask, unsigned int val, + bool *change, bool force_write); typedef struct regmap_async *(*regmap_hw_async_alloc)(void); typedef void (*regmap_hw_free_context)(void *context); @@ -335,6 +338,7 @@ struct regmap_bus { regmap_hw_gather_write gather_write; regmap_hw_async_write async_write; regmap_hw_reg_write reg_write; + regmap_hw_reg_update_bits reg_update_bits; regmap_hw_read read; regmap_hw_reg_read reg_read; regmap_hw_free_context free_context; -- cgit v1.2.3 From bab18991871545dfbd10c931eb0fe8f7637156a9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 2 Oct 2015 15:17:33 +0200 Subject: bpf, seccomp: prepare for upcoming criu support The current ongoing effort to dump existing cBPF seccomp filters back to user space requires to hold the pre-transformed instructions like we do in case of socket filters from sk_attach_filter() side, so they can be reloaded in original form at a later point in time by utilities such as criu. To prepare for this, simply extend the bpf_prog_create_from_user() API to hold a flag that tells whether we should store the original or not. Also, fanout filters could make use of that in future for things like diag. While fanout filters already use bpf_prog_destroy(), move seccomp over to them as well to handle original programs when present. Signed-off-by: Daniel Borkmann Cc: Tycho Andersen Cc: Pavel Emelyanov Cc: Kees Cook Cc: Andy Lutomirski Cc: Alexei Starovoitov Tested-by: Tycho Andersen Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 3d5fd24b321b..1bbce14bcf17 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -411,7 +411,7 @@ typedef int (*bpf_aux_classic_check_t)(struct sock_filter *filter, int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog); int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, - bpf_aux_classic_check_t trans); + bpf_aux_classic_check_t trans, bool save_orig); void bpf_prog_destroy(struct bpf_prog *fp); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); -- cgit v1.2.3 From 0cdf5640e4f6940bdbbefee4bb0adb7dffb185ec Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 2 Oct 2015 18:42:00 +0200 Subject: ebpf: include perf_event only where really needed Commit ea317b267e9d ("bpf: Add new bpf map type to store the pointer to struct perf_event") added perf_event.h to the main eBPF header, so it gets included for all users. perf_event.h is actually only needed from array map side, so lets sanitize this a bit. Signed-off-by: Daniel Borkmann Cc: Kaixu Xia Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f57d7fed9ec3..c915a6b54570 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -10,7 +10,6 @@ #include #include #include -#include struct bpf_map; -- cgit v1.2.3 From 21c4c073f14509d685ed219aa3c76362a7bfa0ac Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 6 Oct 2015 06:25:43 -0700 Subject: Revert "regmap: Allow installing custom reg_update_bits function" This reverts commit 7741c373cf3ea1f5383fa97fb7a640a429d3dd7c. --- include/linux/regmap.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 4d3a3b1680bb..8fc0bfd8edc4 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -296,9 +296,6 @@ typedef int (*regmap_hw_reg_read)(void *context, unsigned int reg, unsigned int *val); typedef int (*regmap_hw_reg_write)(void *context, unsigned int reg, unsigned int val); -typedef int (*regmap_hw_reg_update_bits)(void *context, unsigned int reg, - unsigned int mask, unsigned int val, - bool *change, bool force_write); typedef struct regmap_async *(*regmap_hw_async_alloc)(void); typedef void (*regmap_hw_free_context)(void *context); @@ -338,7 +335,6 @@ struct regmap_bus { regmap_hw_gather_write gather_write; regmap_hw_async_write async_write; regmap_hw_reg_write reg_write; - regmap_hw_reg_update_bits reg_update_bits; regmap_hw_read read; regmap_hw_reg_read reg_read; regmap_hw_free_context free_context; -- cgit v1.2.3 From 77792b11409c9270d98e604b4314b85ce886ac7d Mon Sep 17 00:00:00 2001 From: Jon Ringle Date: Thu, 1 Oct 2015 12:38:07 -0400 Subject: regmap: Allow installing custom reg_update_bits function This commit allows installing a custom reg_update_bits function for cases where the hardware provides a mechanism to set or clear register bits without a read/modify/write cycle. Such is the case with the Microchip ENCX24J600. If a custom reg_update_bits function is provided, it will only be used against volatile registers. Signed-off-by: Jon Ringle Signed-off-by: Mark Brown --- include/linux/regmap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 8fc0bfd8edc4..b49d4133750e 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -296,6 +296,8 @@ typedef int (*regmap_hw_reg_read)(void *context, unsigned int reg, unsigned int *val); typedef int (*regmap_hw_reg_write)(void *context, unsigned int reg, unsigned int val); +typedef int (*regmap_hw_reg_update_bits)(void *context, unsigned int reg, + unsigned int mask, unsigned int val); typedef struct regmap_async *(*regmap_hw_async_alloc)(void); typedef void (*regmap_hw_free_context)(void *context); @@ -335,6 +337,7 @@ struct regmap_bus { regmap_hw_gather_write gather_write; regmap_hw_async_write async_write; regmap_hw_reg_write reg_write; + regmap_hw_reg_update_bits reg_update_bits; regmap_hw_read read; regmap_hw_reg_read reg_read; regmap_hw_free_context free_context; -- cgit v1.2.3 From fee6d4c777a125e56de9370db3b2bf359bf958d6 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 5 Oct 2015 08:51:24 -0700 Subject: net: Add netif_is_l3_slave IPv6 addrconf keys off of IFF_SLAVE so can not use it for L3 slave. Add a new private flag and add netif_is_l3_slave function for checking it. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b9450784ae06..b3374402c1ea 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1261,6 +1261,7 @@ struct net_device_ops { * @IFF_L3MDEV_MASTER: device is an L3 master device * @IFF_NO_QUEUE: device can run without qdisc attached * @IFF_OPENVSWITCH: device is a Open vSwitch master + * @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1286,6 +1287,7 @@ enum netdev_priv_flags { IFF_L3MDEV_MASTER = 1<<20, IFF_NO_QUEUE = 1<<21, IFF_OPENVSWITCH = 1<<22, + IFF_L3MDEV_SLAVE = 1<<23, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -3830,6 +3832,11 @@ static inline bool netif_is_l3_master(const struct net_device *dev) return dev->priv_flags & IFF_L3MDEV_MASTER; } +static inline bool netif_is_l3_slave(const struct net_device *dev) +{ + return dev->priv_flags & IFF_L3MDEV_SLAVE; +} + static inline bool netif_is_bridge_master(const struct net_device *dev) { return dev->priv_flags & IFF_EBRIDGE; -- cgit v1.2.3 From a1cba5613edf50c2a213fa90c30aa10500b241b7 Mon Sep 17 00:00:00 2001 From: Arun Parameswaran Date: Tue, 6 Oct 2015 12:25:48 -0700 Subject: net: phy: Add Broadcom phy library for common interfaces This patch adds the Broadcom phy library to consolidate common interfaces shared by Broadcom phy's. Moved the common interfaces to the 'bcm-phy-lib.c' and updated the Broadcom PHY drivers to use the new APIs. Signed-off-by: Arun Parameswaran Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 697ca7795bd9..6a53ab91407c 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -138,7 +138,10 @@ /* 01010: Auto Power-Down */ #define BCM54XX_SHD_APD 0x0a +#define BCM_APD_CLR_MASK 0xFE9F /* clear bits 5, 6 & 8 */ #define BCM54XX_SHD_APD_EN 0x0020 +#define BCM_NO_ANEG_APD_EN 0x0060 /* bits 5 & 6 */ +#define BCM_APD_SINGLELP_EN 0x0100 /* Bit 8 */ #define BCM5482_SHD_LEDS1 0x0d /* 01101: LED Selector 1 */ /* LED3 / ~LINKSPD[2] selector */ @@ -209,25 +212,6 @@ #define MII_BRCM_FET_SHDW_AUXSTAT2 0x1b /* Auxiliary status 2 */ #define MII_BRCM_FET_SHDW_AS2_APDE 0x0020 /* Auto power down enable */ -/* - * Indirect register access functions for the 1000BASE-T/100BASE-TX/10BASE-T - * 0x1c shadow registers. - */ -static inline int bcm54xx_shadow_read(struct phy_device *phydev, u16 shadow) -{ - phy_write(phydev, MII_BCM54XX_SHD, MII_BCM54XX_SHD_VAL(shadow)); - return MII_BCM54XX_SHD_DATA(phy_read(phydev, MII_BCM54XX_SHD)); -} - -static inline int bcm54xx_shadow_write(struct phy_device *phydev, u16 shadow, - u16 val) -{ - return phy_write(phydev, MII_BCM54XX_SHD, - MII_BCM54XX_SHD_WRITE | - MII_BCM54XX_SHD_VAL(shadow) | - MII_BCM54XX_SHD_DATA(val)); -} - #define BRCM_CL45VEN_EEE_CONTROL 0x803d #define LPI_FEATURE_EN 0x8000 #define LPI_FEATURE_EN_DIG1000X 0x4000 -- cgit v1.2.3 From 8e185d6997bb67068f0ca8f062a50caa2608cf1b Mon Sep 17 00:00:00 2001 From: Arun Parameswaran Date: Tue, 6 Oct 2015 12:25:49 -0700 Subject: net: phy: Broadcom Cygnus internal Etherent PHY driver Add support for the Broadcom Cygnus SoCs internal PHY's. The PHYs are 1000M/100M/10M capable with support for 'EEE' and 'APD' (Auto Power Down). This driver supports the following Broadcom Cygnus SoCs: - BCM583XX (BCM58300, BCM58302, BCM58303, BCM58305) - BCM113XX (BCM11300, BCM11320, BCM11350, BCM11360) The PHY's on these SoC's require some workarounds for stable operation, both during configuration time and during suspend/resume. This driver handles the application of the workarounds. Signed-off-by: Arun Parameswaran Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 6a53ab91407c..59f4a7304419 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -30,6 +30,8 @@ #define PHY_ID_BCM7439_2 0xae025080 #define PHY_ID_BCM7445 0x600d8510 +#define PHY_ID_BCM_CYGNUS 0xae025200 + #define PHY_BCM_OUI_MASK 0xfffffc00 #define PHY_BCM_OUI_1 0x00206000 #define PHY_BCM_OUI_2 0x0143bc00 @@ -216,4 +218,9 @@ #define LPI_FEATURE_EN 0x8000 #define LPI_FEATURE_EN_DIG1000X 0x4000 +/* Core register definitions*/ +#define MII_BRCM_CORE_BASE1E 0x1E +#define MII_BRCM_CORE_EXPB0 0xB0 +#define MII_BRCM_CORE_EXPB1 0xB1 + #endif /* _LINUX_BRCMPHY_H */ -- cgit v1.2.3 From 46234253b9363894a254844a6550b4cc5f3edfe8 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 8 Oct 2015 01:20:35 +0200 Subject: net: move net_get_random_once to lib There's no good reason why users outside of networking should not be using this facility, f.e. for initializing their seeds. Therefore, make it accessible from there as get_random_once(). Signed-off-by: Hannes Frederic Sowa Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/net.h | 21 ++++----------------- include/linux/once.h | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 17 deletions(-) create mode 100644 include/linux/once.h (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 049d4b03c4c4..70ac5e28e6b7 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -24,7 +24,8 @@ #include /* For O_CLOEXEC and O_NONBLOCK */ #include #include -#include +#include + #include struct poll_table_struct; @@ -250,22 +251,8 @@ do { \ } while (0) #endif -bool __net_get_random_once(void *buf, int nbytes, bool *done, - struct static_key *done_key); - -#define net_get_random_once(buf, nbytes) \ - ({ \ - bool ___ret = false; \ - static bool ___done = false; \ - static struct static_key ___once_key = \ - STATIC_KEY_INIT_TRUE; \ - if (static_key_true(&___once_key)) \ - ___ret = __net_get_random_once(buf, \ - nbytes, \ - &___done, \ - &___once_key); \ - ___ret; \ - }) +#define net_get_random_once(buf, nbytes) \ + get_random_once((buf), (nbytes)) int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t len); diff --git a/include/linux/once.h b/include/linux/once.h new file mode 100644 index 000000000000..2a83b538dd6a --- /dev/null +++ b/include/linux/once.h @@ -0,0 +1,24 @@ +#ifndef _LINUX_ONCE_H +#define _LINUX_ONCE_H + +#include +#include + +bool __get_random_once(void *buf, int nbytes, bool *done, + struct static_key *once_key); + +#define get_random_once(buf, nbytes) \ + ({ \ + bool ___ret = false; \ + static bool ___done = false; \ + static struct static_key ___once_key = \ + STATIC_KEY_INIT_TRUE; \ + if (static_key_true(&___once_key)) \ + ___ret = __get_random_once((buf), \ + (nbytes), \ + &___done, \ + &___once_key); \ + ___ret; \ + }) + +#endif /* _LINUX_ONCE_H */ -- cgit v1.2.3 From c90aeb948222a7b3d3391d232ec4f50fd8322ad3 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 8 Oct 2015 01:20:36 +0200 Subject: once: make helper generic for calling functions once Make the get_random_once() helper generic enough, so that functions in general would only be called once, where one user of this is then net_get_random_once(). The only implementation specific call is to get_random_bytes(), all the rest of this *_once() facility would be duplicated among different subsystems otherwise. The new DO_ONCE() helper will be used by prandom() later on, but might also be useful for other scenarios/subsystems as well where a one-time initialization in often-called, possibly fast path code could occur. Signed-off-by: Hannes Frederic Sowa Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/once.h | 61 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 47 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/once.h b/include/linux/once.h index 2a83b538dd6a..285f12cb40e6 100644 --- a/include/linux/once.h +++ b/include/linux/once.h @@ -4,21 +4,54 @@ #include #include -bool __get_random_once(void *buf, int nbytes, bool *done, - struct static_key *once_key); +bool __do_once_start(bool *done, unsigned long *flags); +void __do_once_done(bool *done, struct static_key *once_key, + unsigned long *flags); -#define get_random_once(buf, nbytes) \ - ({ \ - bool ___ret = false; \ - static bool ___done = false; \ - static struct static_key ___once_key = \ - STATIC_KEY_INIT_TRUE; \ - if (static_key_true(&___once_key)) \ - ___ret = __get_random_once((buf), \ - (nbytes), \ - &___done, \ - &___once_key); \ - ___ret; \ +/* Call a function exactly once. The idea of DO_ONCE() is to perform + * a function call such as initialization of random seeds, etc, only + * once, where DO_ONCE() can live in the fast-path. After @func has + * been called with the passed arguments, the static key will patch + * out the condition into a nop. DO_ONCE() guarantees type safety of + * arguments! + * + * Not that the following is not equivalent ... + * + * DO_ONCE(func, arg); + * DO_ONCE(func, arg); + * + * ... to this version: + * + * void foo(void) + * { + * DO_ONCE(func, arg); + * } + * + * foo(); + * foo(); + * + * In case the one-time invocation could be triggered from multiple + * places, then a common helper function must be defined, so that only + * a single static key will be placed there! + */ +#define DO_ONCE(func, ...) \ + ({ \ + bool ___ret = false; \ + static bool ___done = false; \ + static struct static_key ___once_key = STATIC_KEY_INIT_TRUE; \ + if (static_key_true(&___once_key)) { \ + unsigned long ___flags; \ + ___ret = __do_once_start(&___done, &___flags); \ + if (unlikely(___ret)) { \ + func(__VA_ARGS__); \ + __do_once_done(&___done, &___once_key, \ + &___flags); \ + } \ + } \ + ___ret; \ }) +#define get_random_once(buf, nbytes) \ + DO_ONCE(get_random_bytes, (buf), (nbytes)) + #endif /* _LINUX_ONCE_H */ -- cgit v1.2.3 From 897ece56e714a2cc64e6914cb89a362d7021b36e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 8 Oct 2015 01:20:38 +0200 Subject: random32: add prandom_init_once helper for own rngs Add a prandom_init_once() facility that works on the rnd_state, so that users that are keeping their own state independent from prandom_u32() can initialize their taus113 per cpu states. The motivation here is similar to net_get_random_once(): initialize the state as late as possible in the hope that enough entropy has been collected for the seeding. prandom_init_once() makes use of the recently introduced prandom_seed_full_state() helper and is generic enough so that it could also be used on fast-paths due to the DO_ONCE(). Signed-off-by: Daniel Borkmann Acked-by: Hannes Frederic Sowa Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/random.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/random.h b/include/linux/random.h index e651874df2c9..a75840c1aa71 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -7,6 +7,8 @@ #define _LINUX_RANDOM_H #include +#include + #include struct random_ready_callback { @@ -45,6 +47,10 @@ struct rnd_state { u32 prandom_u32_state(struct rnd_state *state); void prandom_bytes_state(struct rnd_state *state, void *buf, size_t nbytes); +void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state); + +#define prandom_init_once(pcpu_state) \ + DO_ONCE(prandom_seed_full_state, (pcpu_state)) /** * prandom_u32_max - returns a pseudo-random number in interval [0, ep_ro) -- cgit v1.2.3 From 3ad0040573b0c00f88488bc31958acd07a55ee2e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 8 Oct 2015 01:20:39 +0200 Subject: bpf: split state from prandom_u32() and consolidate {c, e}BPF prngs While recently arguing on a seccomp discussion that raw prandom_u32() access shouldn't be exposed to unpriviledged user space, I forgot the fact that SKF_AD_RANDOM extension actually already does it for some time in cBPF via commit 4cd3675ebf74 ("filter: added BPF random opcode"). Since prandom_u32() is being used in a lot of critical networking code, lets be more conservative and split their states. Furthermore, consolidate eBPF and cBPF prandom handlers to use the new internal PRNG. For eBPF, bpf_get_prandom_u32() was only accessible for priviledged users, but should that change one day, we also don't want to leak raw sequences through things like eBPF maps. One thought was also to have own per bpf_prog states, but due to ABI reasons this is not easily possible, i.e. the program code currently cannot access bpf_prog itself, and copying the rnd_state to/from the stack scratch space whenever a program uses the prng seems not really worth the trouble and seems too hacky. If needed, taus113 could in such cases be implemented within eBPF using a map entry to keep the state space, or get_random_bytes() could become a second helper in cases where performance would not be critical. Both sides can trigger a one-time late init via prandom_init_once() on the shared state. Performance-wise, there should even be a tiny gain as bpf_user_rnd_u32() saves one function call. The PRNG needs to live inside the BPF core since kernels could have a NET-less config as well. Signed-off-by: Daniel Borkmann Acked-by: Hannes Frederic Sowa Acked-by: Alexei Starovoitov Cc: Chema Gonzalez Signed-off-by: David S. Miller --- include/linux/bpf.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c915a6b54570..3697ad563899 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -200,4 +200,8 @@ extern const struct bpf_func_proto bpf_get_current_comm_proto; extern const struct bpf_func_proto bpf_skb_vlan_push_proto; extern const struct bpf_func_proto bpf_skb_vlan_pop_proto; +/* Shared helpers among cBPF and eBPF. */ +void bpf_user_rnd_init_once(void); +u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); + #endif /* _LINUX_BPF_H */ -- cgit v1.2.3 From 020446e01eebc9dbe7eda038e570ab9c7ab13586 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 8 Oct 2015 17:13:58 +0300 Subject: net/mlx5_core: Prepare cmd interface to system errors handling In preparation to handling system errors at the mlx5_core level, change the interface of cmd_work_handler to accept a 64 bit argument for the vector. This allows to encode a flag that signifies when the handler is called as a result of a driver logic that wishes to terminate commands that the hardware may not be able to terminate. Such command completions are detected at the handler and proper return status is encoded. To be able to terminate page handler commands, we make sure to set the corresponding bit in the bitmask. Signed-off-by: Eli Cohen Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 8b6d6f2154a4..aa899559eec0 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -731,7 +731,7 @@ void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); #endif void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type); struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); -void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector); +void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec); void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type); int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, int nent, u64 mask, const char *name, struct mlx5_uar *uar); @@ -865,4 +865,8 @@ static inline int mlx5_get_gid_table_len(u16 param) return 8 * (1 << param); } +enum { + MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32, +}; + #endif /* MLX5_DRIVER_H */ -- cgit v1.2.3 From ac6ea6e81a80172612e0c9ef93720f371b198918 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 8 Oct 2015 17:14:00 +0300 Subject: net/mlx5_core: Use private health thread for each device Use a single threaded work queue for each device in the system instead of using one thread for any device. This is required so we can concurrently process system error handling for all the devices that need that. Signed-off-by: Eli Cohen Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index aa899559eec0..41a32873f608 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -391,9 +391,10 @@ struct mlx5_core_health { struct health_buffer __iomem *health; __be32 __iomem *health_counter; struct timer_list timer; - struct list_head list; u32 prev; int miss_counter; + struct workqueue_struct *wq; + struct work_struct work; }; struct mlx5_cq_table { @@ -676,8 +677,8 @@ int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari); int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari); int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar); void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar); -void mlx5_health_cleanup(void); -void __init mlx5_health_init(void); +void mlx5_health_cleanup(struct mlx5_core_dev *dev); +int mlx5_health_init(struct mlx5_core_dev *dev); void mlx5_start_health_poll(struct mlx5_core_dev *dev); void mlx5_stop_health_poll(struct mlx5_core_dev *dev); int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, -- cgit v1.2.3 From 61d03535e4be3a46c1e171a25458237e343195e3 Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Thu, 8 Oct 2015 21:28:54 +0800 Subject: net/netlink: lockdep_genl_is_held can be boolean This patch makes lockdep_genl_is_held return bool to improve readability due to this particular function only using either one or zero as its return value. No functional change. Signed-off-by: Yaowei Bai Signed-off-by: David S. Miller --- include/linux/genetlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h index 09460d6d6682..a4c61cbce777 100644 --- a/include/linux/genetlink.h +++ b/include/linux/genetlink.h @@ -8,7 +8,7 @@ extern void genl_lock(void); extern void genl_unlock(void); #ifdef CONFIG_LOCKDEP -extern int lockdep_genl_is_held(void); +extern bool lockdep_genl_is_held(void); #endif /* for synchronisation between af_netlink and genetlink */ -- cgit v1.2.3 From 35498edc6481d588feadee7e76220884d5bbca48 Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Thu, 8 Oct 2015 21:28:55 +0800 Subject: net/ieee80211: ieee80211_is_* can be boolean This patch makes ieee80211_is_* return bool to improve readability due to these particular functions only using either one or zero as their return value. No functional change. Signed-off-by: Yaowei Bai Signed-off-by: David S. Miller --- include/linux/ieee80211.h | 76 +++++++++++++++++++++++------------------------ 1 file changed, 38 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index f79a02a69d26..dcfb2f43d316 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -121,7 +121,7 @@ #define IEEE80211_MAX_SN IEEE80211_SN_MASK #define IEEE80211_SN_MODULO (IEEE80211_MAX_SN + 1) -static inline int ieee80211_sn_less(u16 sn1, u16 sn2) +static inline bool ieee80211_sn_less(u16 sn1, u16 sn2) { return ((sn1 - sn2) & IEEE80211_SN_MASK) > (IEEE80211_SN_MODULO >> 1); } @@ -250,7 +250,7 @@ struct ieee80211_qos_hdr { * ieee80211_has_tods - check if IEEE80211_FCTL_TODS is set * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_has_tods(__le16 fc) +static inline bool ieee80211_has_tods(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_TODS)) != 0; } @@ -259,7 +259,7 @@ static inline int ieee80211_has_tods(__le16 fc) * ieee80211_has_fromds - check if IEEE80211_FCTL_FROMDS is set * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_has_fromds(__le16 fc) +static inline bool ieee80211_has_fromds(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FROMDS)) != 0; } @@ -268,7 +268,7 @@ static inline int ieee80211_has_fromds(__le16 fc) * ieee80211_has_a4 - check if IEEE80211_FCTL_TODS and IEEE80211_FCTL_FROMDS are set * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_has_a4(__le16 fc) +static inline bool ieee80211_has_a4(__le16 fc) { __le16 tmp = cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS); return (fc & tmp) == tmp; @@ -278,7 +278,7 @@ static inline int ieee80211_has_a4(__le16 fc) * ieee80211_has_morefrags - check if IEEE80211_FCTL_MOREFRAGS is set * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_has_morefrags(__le16 fc) +static inline bool ieee80211_has_morefrags(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_MOREFRAGS)) != 0; } @@ -287,7 +287,7 @@ static inline int ieee80211_has_morefrags(__le16 fc) * ieee80211_has_retry - check if IEEE80211_FCTL_RETRY is set * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_has_retry(__le16 fc) +static inline bool ieee80211_has_retry(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_RETRY)) != 0; } @@ -296,7 +296,7 @@ static inline int ieee80211_has_retry(__le16 fc) * ieee80211_has_pm - check if IEEE80211_FCTL_PM is set * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_has_pm(__le16 fc) +static inline bool ieee80211_has_pm(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_PM)) != 0; } @@ -305,7 +305,7 @@ static inline int ieee80211_has_pm(__le16 fc) * ieee80211_has_moredata - check if IEEE80211_FCTL_MOREDATA is set * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_has_moredata(__le16 fc) +static inline bool ieee80211_has_moredata(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_MOREDATA)) != 0; } @@ -314,7 +314,7 @@ static inline int ieee80211_has_moredata(__le16 fc) * ieee80211_has_protected - check if IEEE80211_FCTL_PROTECTED is set * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_has_protected(__le16 fc) +static inline bool ieee80211_has_protected(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_PROTECTED)) != 0; } @@ -323,7 +323,7 @@ static inline int ieee80211_has_protected(__le16 fc) * ieee80211_has_order - check if IEEE80211_FCTL_ORDER is set * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_has_order(__le16 fc) +static inline bool ieee80211_has_order(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_ORDER)) != 0; } @@ -332,7 +332,7 @@ static inline int ieee80211_has_order(__le16 fc) * ieee80211_is_mgmt - check if type is IEEE80211_FTYPE_MGMT * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_mgmt(__le16 fc) +static inline bool ieee80211_is_mgmt(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT); @@ -342,7 +342,7 @@ static inline int ieee80211_is_mgmt(__le16 fc) * ieee80211_is_ctl - check if type is IEEE80211_FTYPE_CTL * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_ctl(__le16 fc) +static inline bool ieee80211_is_ctl(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL); @@ -352,7 +352,7 @@ static inline int ieee80211_is_ctl(__le16 fc) * ieee80211_is_data - check if type is IEEE80211_FTYPE_DATA * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_data(__le16 fc) +static inline bool ieee80211_is_data(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE)) == cpu_to_le16(IEEE80211_FTYPE_DATA); @@ -362,7 +362,7 @@ static inline int ieee80211_is_data(__le16 fc) * ieee80211_is_data_qos - check if type is IEEE80211_FTYPE_DATA and IEEE80211_STYPE_QOS_DATA is set * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_data_qos(__le16 fc) +static inline bool ieee80211_is_data_qos(__le16 fc) { /* * mask with QOS_DATA rather than IEEE80211_FCTL_STYPE as we just need @@ -376,7 +376,7 @@ static inline int ieee80211_is_data_qos(__le16 fc) * ieee80211_is_data_present - check if type is IEEE80211_FTYPE_DATA and has data * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_data_present(__le16 fc) +static inline bool ieee80211_is_data_present(__le16 fc) { /* * mask with 0x40 and test that that bit is clear to only return true @@ -390,7 +390,7 @@ static inline int ieee80211_is_data_present(__le16 fc) * ieee80211_is_assoc_req - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ASSOC_REQ * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_assoc_req(__le16 fc) +static inline bool ieee80211_is_assoc_req(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ASSOC_REQ); @@ -400,7 +400,7 @@ static inline int ieee80211_is_assoc_req(__le16 fc) * ieee80211_is_assoc_resp - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ASSOC_RESP * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_assoc_resp(__le16 fc) +static inline bool ieee80211_is_assoc_resp(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ASSOC_RESP); @@ -410,7 +410,7 @@ static inline int ieee80211_is_assoc_resp(__le16 fc) * ieee80211_is_reassoc_req - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_REASSOC_REQ * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_reassoc_req(__le16 fc) +static inline bool ieee80211_is_reassoc_req(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_REASSOC_REQ); @@ -420,7 +420,7 @@ static inline int ieee80211_is_reassoc_req(__le16 fc) * ieee80211_is_reassoc_resp - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_REASSOC_RESP * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_reassoc_resp(__le16 fc) +static inline bool ieee80211_is_reassoc_resp(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_REASSOC_RESP); @@ -430,7 +430,7 @@ static inline int ieee80211_is_reassoc_resp(__le16 fc) * ieee80211_is_probe_req - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_PROBE_REQ * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_probe_req(__le16 fc) +static inline bool ieee80211_is_probe_req(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_REQ); @@ -440,7 +440,7 @@ static inline int ieee80211_is_probe_req(__le16 fc) * ieee80211_is_probe_resp - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_PROBE_RESP * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_probe_resp(__le16 fc) +static inline bool ieee80211_is_probe_resp(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_RESP); @@ -450,7 +450,7 @@ static inline int ieee80211_is_probe_resp(__le16 fc) * ieee80211_is_beacon - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_BEACON * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_beacon(__le16 fc) +static inline bool ieee80211_is_beacon(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_BEACON); @@ -460,7 +460,7 @@ static inline int ieee80211_is_beacon(__le16 fc) * ieee80211_is_atim - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ATIM * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_atim(__le16 fc) +static inline bool ieee80211_is_atim(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ATIM); @@ -470,7 +470,7 @@ static inline int ieee80211_is_atim(__le16 fc) * ieee80211_is_disassoc - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_DISASSOC * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_disassoc(__le16 fc) +static inline bool ieee80211_is_disassoc(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_DISASSOC); @@ -480,7 +480,7 @@ static inline int ieee80211_is_disassoc(__le16 fc) * ieee80211_is_auth - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_AUTH * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_auth(__le16 fc) +static inline bool ieee80211_is_auth(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_AUTH); @@ -490,7 +490,7 @@ static inline int ieee80211_is_auth(__le16 fc) * ieee80211_is_deauth - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_DEAUTH * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_deauth(__le16 fc) +static inline bool ieee80211_is_deauth(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_DEAUTH); @@ -500,7 +500,7 @@ static inline int ieee80211_is_deauth(__le16 fc) * ieee80211_is_action - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ACTION * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_action(__le16 fc) +static inline bool ieee80211_is_action(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); @@ -510,7 +510,7 @@ static inline int ieee80211_is_action(__le16 fc) * ieee80211_is_back_req - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_BACK_REQ * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_back_req(__le16 fc) +static inline bool ieee80211_is_back_req(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_BACK_REQ); @@ -520,7 +520,7 @@ static inline int ieee80211_is_back_req(__le16 fc) * ieee80211_is_back - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_BACK * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_back(__le16 fc) +static inline bool ieee80211_is_back(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_BACK); @@ -530,7 +530,7 @@ static inline int ieee80211_is_back(__le16 fc) * ieee80211_is_pspoll - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_PSPOLL * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_pspoll(__le16 fc) +static inline bool ieee80211_is_pspoll(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_PSPOLL); @@ -540,7 +540,7 @@ static inline int ieee80211_is_pspoll(__le16 fc) * ieee80211_is_rts - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_RTS * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_rts(__le16 fc) +static inline bool ieee80211_is_rts(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_RTS); @@ -550,7 +550,7 @@ static inline int ieee80211_is_rts(__le16 fc) * ieee80211_is_cts - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_CTS * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_cts(__le16 fc) +static inline bool ieee80211_is_cts(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTS); @@ -560,7 +560,7 @@ static inline int ieee80211_is_cts(__le16 fc) * ieee80211_is_ack - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_ACK * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_ack(__le16 fc) +static inline bool ieee80211_is_ack(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_ACK); @@ -570,7 +570,7 @@ static inline int ieee80211_is_ack(__le16 fc) * ieee80211_is_cfend - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_CFEND * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_cfend(__le16 fc) +static inline bool ieee80211_is_cfend(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CFEND); @@ -580,7 +580,7 @@ static inline int ieee80211_is_cfend(__le16 fc) * ieee80211_is_cfendack - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_CFENDACK * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_cfendack(__le16 fc) +static inline bool ieee80211_is_cfendack(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CFENDACK); @@ -590,7 +590,7 @@ static inline int ieee80211_is_cfendack(__le16 fc) * ieee80211_is_nullfunc - check if frame is a regular (non-QoS) nullfunc frame * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_nullfunc(__le16 fc) +static inline bool ieee80211_is_nullfunc(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC); @@ -600,7 +600,7 @@ static inline int ieee80211_is_nullfunc(__le16 fc) * ieee80211_is_qos_nullfunc - check if frame is a QoS nullfunc frame * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_qos_nullfunc(__le16 fc) +static inline bool ieee80211_is_qos_nullfunc(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC); @@ -624,7 +624,7 @@ static inline bool ieee80211_is_bufferable_mmpdu(__le16 fc) * ieee80211_is_first_frag - check if IEEE80211_SCTL_FRAG is not set * @seq_ctrl: frame sequence control bytes in little-endian byteorder */ -static inline int ieee80211_is_first_frag(__le16 seq_ctrl) +static inline bool ieee80211_is_first_frag(__le16 seq_ctrl) { return (seq_ctrl & cpu_to_le16(IEEE80211_SCTL_FRAG)) == 0; } -- cgit v1.2.3 From 875e08294911b3cb8c60416d64d990809421de29 Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Thu, 8 Oct 2015 21:28:56 +0800 Subject: net/nfnetlink: lockdep_nfnl_is_held can be boolean This patch makes lockdep_nfnl_is_held return bool to improve readability due to this particular function only using either one or zero as its return value. No functional change. Signed-off-by: Yaowei Bai Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index e955d4730625..249d1bb01e03 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -45,11 +45,11 @@ int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid, void nfnl_lock(__u8 subsys_id); void nfnl_unlock(__u8 subsys_id); #ifdef CONFIG_PROVE_LOCKING -int lockdep_nfnl_is_held(__u8 subsys_id); +bool lockdep_nfnl_is_held(__u8 subsys_id); #else -static inline int lockdep_nfnl_is_held(__u8 subsys_id) +static inline bool lockdep_nfnl_is_held(__u8 subsys_id) { - return 1; + return true; } #endif /* CONFIG_PROVE_LOCKING */ -- cgit v1.2.3 From d6fbaea5f635216c9861587c4e658086cf3b1b6b Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Thu, 8 Oct 2015 21:28:57 +0800 Subject: net/can: can_dropped_invalid_skb can be boolean This patch makes can_dropped_invalid_skb return bool due to this particular function only using either one or zero as its return value. No functional change. Signed-off-by: Yaowei Bai Acked-by: Marc Kleine-Budde Signed-off-by: David S. Miller --- include/linux/can/dev.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 56dcadd83716..735f9f8c4e43 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -78,7 +78,7 @@ struct can_priv { #define get_canfd_dlc(i) (min_t(__u8, (i), CANFD_MAX_DLC)) /* Drop a given socketbuffer if it does not contain a valid CAN frame. */ -static inline int can_dropped_invalid_skb(struct net_device *dev, +static inline bool can_dropped_invalid_skb(struct net_device *dev, struct sk_buff *skb) { const struct canfd_frame *cfd = (struct canfd_frame *)skb->data; @@ -94,12 +94,12 @@ static inline int can_dropped_invalid_skb(struct net_device *dev, } else goto inval_skb; - return 0; + return false; inval_skb: kfree_skb(skb); dev->stats.tx_dropped++; - return 1; + return true; } static inline bool can_is_canfd_skb(const struct sk_buff *skb) -- cgit v1.2.3 From 0c6119d99bf5df9403a688d267537284e9cc8bcb Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Thu, 8 Oct 2015 21:28:58 +0800 Subject: net/dccp: dccp_list_has_service can be boolean This patch makes dccp_list_has_service return bool due to this particular function only using either one or zero as its return value. No functional change. Signed-off-by: Yaowei Bai Signed-off-by: David S. Miller --- include/linux/dccp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 221025423e6c..61d042bbbf60 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -202,16 +202,16 @@ struct dccp_service_list { #define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1) #define DCCP_SERVICE_CODE_IS_ABSENT 0 -static inline int dccp_list_has_service(const struct dccp_service_list *sl, +static inline bool dccp_list_has_service(const struct dccp_service_list *sl, const __be32 service) { if (likely(sl != NULL)) { u32 i = sl->dccpsl_nr; while (i--) if (sl->dccpsl_list[i] == service) - return 1; + return true; } - return 0; + return false; } struct dccp_ackvec; -- cgit v1.2.3 From c3225164cf60ccecce2459dcb5813dd798233f2d Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Thu, 8 Oct 2015 21:29:00 +0800 Subject: net/inetdevice: inet_ifa_match can be boolean This patch makes inet_ifa_match return bool due to this particular function only using either one or zero as its return value. No functional change. Signed-off-by: Yaowei Bai Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index a4328cea376a..3b0999e0260f 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -171,7 +171,7 @@ __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev, __be32 dst, __be32 local, int scope); struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, __be32 mask); -static __inline__ int inet_ifa_match(__be32 addr, struct in_ifaddr *ifa) +static __inline__ bool inet_ifa_match(__be32 addr, struct in_ifaddr *ifa) { return !((addr^ifa->ifa_address)&ifa->ifa_mask); } -- cgit v1.2.3 From f06cc7b284f3dfb2c5decbf9fde711b50a530050 Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Thu, 8 Oct 2015 21:29:01 +0800 Subject: net/inetdevice: bad_mask can be boolean This patch makes bad_mask return bool due to this particular function only using either one or zero as its return value. No functional change. Signed-off-by: Yaowei Bai Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 3b0999e0260f..ee971f335a8b 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -180,15 +180,15 @@ static __inline__ bool inet_ifa_match(__be32 addr, struct in_ifaddr *ifa) * Check if a mask is acceptable. */ -static __inline__ int bad_mask(__be32 mask, __be32 addr) +static __inline__ bool bad_mask(__be32 mask, __be32 addr) { __u32 hmask; if (addr & (mask = ~mask)) - return 1; + return true; hmask = ntohl(mask); if (hmask & (hmask+1)) - return 1; - return 0; + return true; + return false; } #define for_primary_ifa(in_dev) { struct in_ifaddr *ifa; \ -- cgit v1.2.3 From 0cbf334376d5e82d7a2f5cd234ca4f5d0843f3ea Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Thu, 8 Oct 2015 21:29:02 +0800 Subject: net/core: lockdep_rtnl_is_held can be boolean This patch makes lockdep_rtnl_is_held return bool due to this particular function only using either one or zero as its return value. In another patch lockdep_is_held is also made return bool. No functional change. Signed-off-by: Yaowei Bai Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 39adaa9529eb..4be5048b1fbe 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -33,11 +33,11 @@ extern wait_queue_head_t netdev_unregistering_wq; extern struct mutex net_mutex; #ifdef CONFIG_PROVE_LOCKING -extern int lockdep_rtnl_is_held(void); +extern bool lockdep_rtnl_is_held(void); #else -static inline int lockdep_rtnl_is_held(void) +static inline bool lockdep_rtnl_is_held(void) { - return 1; + return true; } #endif /* #ifdef CONFIG_PROVE_LOCKING */ -- cgit v1.2.3 From ff936a04e5f28b7e0455be0e7fa91334f89e4b44 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 7 Oct 2015 10:55:41 -0700 Subject: bpf: fix cb access in socket filter programs eBPF socket filter programs may see junk in 'u32 cb[5]' area, since it could have been used by protocol layers earlier. For socket filter programs used in af_packet we need to clean 20 bytes of skb->cb area if it could be used by the program. For programs attached to TCP/UDP sockets we need to save/restore these 20 bytes, since it's used by protocol layers. Remove SK_RUN_FILTER macro, since it's no longer used. Long term we may move this bpf cb area to per-cpu scratch, but that requires addition of new 'per-cpu load/store' instructions, so not suitable as a short term fix. Fixes: d691f9e8d440 ("bpf: allow programs to write to certain skb fields") Reported-by: Eric Dumazet Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 6 +++--- include/linux/filter.h | 39 +++++++++++++++++++++++++++++++++++---- 2 files changed, 38 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3697ad563899..b4fdee6cb686 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -100,6 +100,8 @@ enum bpf_access_type { BPF_WRITE = 2 }; +struct bpf_prog; + struct bpf_verifier_ops { /* return eBPF function prototype for verification */ const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id); @@ -111,7 +113,7 @@ struct bpf_verifier_ops { u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg, int src_reg, int ctx_off, - struct bpf_insn *insn); + struct bpf_insn *insn, struct bpf_prog *prog); }; struct bpf_prog_type_list { @@ -120,8 +122,6 @@ struct bpf_prog_type_list { enum bpf_prog_type type; }; -struct bpf_prog; - struct bpf_prog_aux { atomic_t refcnt; u32 used_map_cnt; diff --git a/include/linux/filter.h b/include/linux/filter.h index 1bbce14bcf17..4165e9ac9e36 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -13,6 +13,7 @@ #include #include #include +#include #include @@ -302,10 +303,6 @@ struct bpf_prog_aux; bpf_size; \ }) -/* Macro to invoke filter function. */ -#define SK_RUN_FILTER(filter, ctx) \ - (*filter->prog->bpf_func)(ctx, filter->prog->insnsi) - #ifdef CONFIG_COMPAT /* A struct sock_filter is architecture independent. */ struct compat_sock_fprog { @@ -329,6 +326,7 @@ struct bpf_prog { kmemcheck_bitfield_begin(meta); u16 jited:1, /* Is our filter JIT'ed? */ gpl_compatible:1, /* Is filter GPL compatible? */ + cb_access:1, /* Is control block accessed? */ dst_needed:1; /* Do we need dst entry? */ kmemcheck_bitfield_end(meta); u32 len; /* Number of filter blocks */ @@ -352,6 +350,39 @@ struct sk_filter { #define BPF_PROG_RUN(filter, ctx) (*filter->bpf_func)(ctx, filter->insnsi) +static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, + struct sk_buff *skb) +{ + u8 *cb_data = qdisc_skb_cb(skb)->data; + u8 saved_cb[QDISC_CB_PRIV_LEN]; + u32 res; + + BUILD_BUG_ON(FIELD_SIZEOF(struct __sk_buff, cb) != + QDISC_CB_PRIV_LEN); + + if (unlikely(prog->cb_access)) { + memcpy(saved_cb, cb_data, sizeof(saved_cb)); + memset(cb_data, 0, sizeof(saved_cb)); + } + + res = BPF_PROG_RUN(prog, skb); + + if (unlikely(prog->cb_access)) + memcpy(cb_data, saved_cb, sizeof(saved_cb)); + + return res; +} + +static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, + struct sk_buff *skb) +{ + u8 *cb_data = qdisc_skb_cb(skb)->data; + + if (unlikely(prog->cb_access)) + memset(cb_data, 0, QDISC_CB_PRIV_LEN); + return BPF_PROG_RUN(prog, skb); +} + static inline unsigned int bpf_prog_size(unsigned int proglen) { return max(sizeof(struct bpf_prog), -- cgit v1.2.3 From 1be7f75d1668d6296b80bf35dcf6762393530afc Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 7 Oct 2015 22:23:21 -0700 Subject: bpf: enable non-root eBPF programs In order to let unprivileged users load and execute eBPF programs teach verifier to prevent pointer leaks. Verifier will prevent - any arithmetic on pointers (except R10+Imm which is used to compute stack addresses) - comparison of pointers (except if (map_value_ptr == 0) ... ) - passing pointers to helper functions - indirectly passing pointers in stack to helper functions - returning pointer from bpf program - storing pointers into ctx or maps Spill/fill of pointers into stack is allowed, but mangling of pointers stored in the stack or reading them byte by byte is not. Within bpf programs the pointers do exist, since programs need to be able to access maps, pass skb pointer to LD_ABS insns, etc but programs cannot pass such pointer values to the outside or obfuscate them. Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs, so that socket filters (tcpdump), af_packet (quic acceleration) and future kcm can use it. tracing and tc cls/act program types still require root permissions, since tracing actually needs to be able to see all kernel pointers and tc is for root only. For example, the following unprivileged socket filter program is allowed: int bpf_prog1(struct __sk_buff *skb) { u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol)); u64 *value = bpf_map_lookup_elem(&my_map, &index); if (value) *value += skb->len; return 0; } but the following program is not: int bpf_prog1(struct __sk_buff *skb) { u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol)); u64 *value = bpf_map_lookup_elem(&my_map, &index); if (value) *value += (u64) skb; return 0; } since it would leak the kernel address into the map. Unprivileged socket filter bpf programs have access to the following helper functions: - map lookup/update/delete (but they cannot store kernel pointers into them) - get_random (it's already exposed to unprivileged user space) - get_smp_processor_id - tail_call into another socket filter program - ktime_get_ns The feature is controlled by sysctl kernel.unprivileged_bpf_disabled. This toggle defaults to off (0), but can be set true (1). Once true, bpf programs and maps cannot be accessed from unprivileged process, and the toggle cannot be set back to false. Signed-off-by: Alexei Starovoitov Reviewed-by: Kees Cook Signed-off-by: David S. Miller --- include/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b4fdee6cb686..02fa3db3c1ec 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -167,6 +167,8 @@ void bpf_prog_put_rcu(struct bpf_prog *prog); struct bpf_map *bpf_map_get(struct fd f); void bpf_map_put(struct bpf_map *map); +extern int sysctl_unprivileged_bpf_disabled; + /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); #else -- cgit v1.2.3 From aaac3ba95e4c8b496d22f68bd1bc01cfbf525eca Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 7 Oct 2015 22:23:22 -0700 Subject: bpf: charge user for creation of BPF maps and programs since eBPF programs and maps use kernel memory consider it 'locked' memory from user accounting point of view and charge it against RLIMIT_MEMLOCK limit. This limit is typically set to 64Kbytes by distros, so almost all bpf+tracing programs would need to increase it, since they use maps, but kernel charges maximum map size upfront. For example the hash map of 1024 elements will be charged as 64Kbyte. It's inconvenient for current users and changes current behavior for root, but probably worth doing to be consistent root vs non-root. Similar accounting logic is done by mmap of perf_event. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 3 +++ include/linux/sched.h | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 02fa3db3c1ec..e3a51b74e275 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -36,6 +36,8 @@ struct bpf_map { u32 key_size; u32 value_size; u32 max_entries; + u32 pages; + struct user_struct *user; const struct bpf_map_ops *ops; struct work_struct work; }; @@ -128,6 +130,7 @@ struct bpf_prog_aux { const struct bpf_verifier_ops *ops; struct bpf_map **used_maps; struct bpf_prog *prog; + struct user_struct *user; union { struct work_struct work; struct rcu_head rcu; diff --git a/include/linux/sched.h b/include/linux/sched.h index b7b9501b41af..4817df5fffae 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -840,7 +840,7 @@ struct user_struct { struct hlist_node uidhash_node; kuid_t uid; -#ifdef CONFIG_PERF_EVENTS +#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL) atomic_long_t locked_vm; #endif }; -- cgit v1.2.3 From d475f090bf1c0dc2999e98bbf2e7cb2243358849 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Oct 2015 19:33:24 -0700 Subject: tcp: shrink tcp_timewait_sock by 8 bytes Reducing tcp_timewait_sock from 280 bytes to 272 bytes allows SLAB to pack 15 objects per page instead of 14 (on x86) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index e442e6e9a365..86a7edaa6797 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -356,8 +356,8 @@ static inline struct tcp_sock *tcp_sk(const struct sock *sk) struct tcp_timewait_sock { struct inet_timewait_sock tw_sk; - u32 tw_rcv_nxt; - u32 tw_snd_nxt; +#define tw_rcv_nxt tw_sk.__tw_common.skc_tw_rcv_nxt +#define tw_snd_nxt tw_sk.__tw_common.skc_tw_snd_nxt u32 tw_rcv_wnd; u32 tw_ts_offset; u32 tw_ts_recent; -- cgit v1.2.3 From a4288289f585d42a19145f266e214acb165fe9b3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 7 Oct 2015 15:48:25 +0200 Subject: wireless: update robust action frame list Unprotected DMG and VHT action frames are not protected, reflect that in the list. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index dcfb2f43d316..0109f3847e9a 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2397,6 +2397,8 @@ static inline bool _ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr) return *category != WLAN_CATEGORY_PUBLIC && *category != WLAN_CATEGORY_HT && *category != WLAN_CATEGORY_SELF_PROTECTED && + *category != WLAN_CATEGORY_UNPROT_DMG && + *category != WLAN_CATEGORY_VHT && *category != WLAN_CATEGORY_VENDOR_SPECIFIC; } -- cgit v1.2.3 From af61426187cd854bffe013ca8547bd8fa3c4dfbf Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 7 Oct 2015 15:48:26 +0200 Subject: wireless: add WNM action frame categories Add the WNM and unprotected WNM categories and mark the latter as not robust. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 0109f3847e9a..452c0b0d2f32 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1932,6 +1932,8 @@ enum ieee80211_category { WLAN_CATEGORY_HT = 7, WLAN_CATEGORY_SA_QUERY = 8, WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION = 9, + WLAN_CATEGORY_WNM = 10, + WLAN_CATEGORY_WNM_UNPROTECTED = 11, WLAN_CATEGORY_TDLS = 12, WLAN_CATEGORY_MESH_ACTION = 13, WLAN_CATEGORY_MULTIHOP_ACTION = 14, @@ -2396,6 +2398,7 @@ static inline bool _ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr) category = ((u8 *) hdr) + 24; return *category != WLAN_CATEGORY_PUBLIC && *category != WLAN_CATEGORY_HT && + *category != WLAN_CATEGORY_WNM_UNPROTECTED && *category != WLAN_CATEGORY_SELF_PROTECTED && *category != WLAN_CATEGORY_UNPROT_DMG && *category != WLAN_CATEGORY_VHT && -- cgit v1.2.3